23 files changed, 2093 insertions, 361 deletions
diff --git a/test/Transforms/GVN/PRE/pre-after-rle.ll b/test/Transforms/GVN/PRE/pre-after-rle.ll
index ff35e56a6761..879d20e891be 100644
--- a/test/Transforms/GVN/PRE/pre-after-rle.ll
+++ b/test/Transforms/GVN/PRE/pre-after-rle.ll
@@ -63,10 +63,12 @@ preheader:
   %cmp = icmp slt i32 1, %h
   br i1 %cmp, label %body, label %exit
 
-; Alias analysis currently can't figure out %width doesn't alias %s, so just
-; check that the redundant load has been removed.
+; CHECK-LABEL: preheader.body_crit_edge:
+; CHECK: load i32, i32* %width, align 8
+
 ; CHECK-LABEL: body:
 ; CHECK-NOT: load i32*, i32** %start, align 8
+; CHECK-NOT: load i32, i32* %width, align 8
 body:
   %j = phi i32 [ 0, %preheader ], [ %j.next, %body ]
   %s = load i32*, i32** %start, align 8
diff --git a/test/Transforms/GlobalOpt/globalsra-multigep.ll b/test/Transforms/GlobalOpt/globalsra-multigep.ll
new file mode 100644
index 000000000000..87a8486d8818
--- /dev/null
+++ b/test/Transforms/GlobalOpt/globalsra-multigep.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@g_data = internal unnamed_addr global <{ [8 x i16], [8 x i16] }> <{ [8 x i16] [i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16], [8 x i16] zeroinitializer }>, align 16
+; We cannot SRA here due to the second gep meaning the access to g_data may be to either element
+; CHECK: @g_data = internal unnamed_addr constant <{ [8 x i16], [8 x i16] }>
+
+define i16 @test(i64 %a1) {
+entry:
+  %g1 = getelementptr inbounds <{ [8 x i16], [8 x i16] }>, <{ [8 x i16], [8 x i16] }>* @g_data, i64 0, i32 0
+  %arrayidx.i = getelementptr inbounds [8 x i16], [8 x i16]* %g1, i64 0, i64 %a1
+  %r = load i16, i16* %arrayidx.i, align 2
+  ret i16 %r
+}
diff --git a/test/Transforms/GlobalOpt/globalsra-partial.ll b/test/Transforms/GlobalOpt/globalsra-partial.ll
index 6f24128c42b5..141ee1bb5a8c 100644
--- a/test/Transforms/GlobalOpt/globalsra-partial.ll
+++ b/test/Transforms/GlobalOpt/globalsra-partial.ll
@@ -1,11 +1,12 @@
-; In this case, the global can only be broken up by one level.
+; In this case, the global cannot be merged as i may be out of range
 
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @G = internal global { i32, [4 x float] } zeroinitializer               ; <{ i32, [4 x float] }*> [#uses=3]
 
-; CHECK-NOT: 12345
+; CHECK: @G = internal unnamed_addr global { i32, [4 x float] }
+; CHECK: 12345
 define void @onlystore() {
         store i32 12345, i32* getelementptr ({ i32, [4 x float] }, { i32, [4 x float] }* @G, i32 0, i32 0)
         ret void
diff --git a/test/Transforms/Inline/attributes.ll b/test/Transforms/Inline/attributes.ll
index c2808ba8c037..66a831bf8179 100644
--- a/test/Transforms/Inline/attributes.ll
+++ b/test/Transforms/Inline/attributes.ll
@@ -333,9 +333,10 @@ define i32 @test_no-use-jump-tables3(i32 %i) "no-jump-tables"="true" {
 ; CHECK-NEXT: ret i32
 }
 
-; Calle with "null-pointer-is-valid"="true" attribute should not be inlined
-; into a caller without this attribute. Exception: alwaysinline callee
-; can still be inlined.
+; Callee with "null-pointer-is-valid"="true" attribute should not be inlined
+; into a caller without this attribute.
+; Exception: alwaysinline callee can still be inlined but
+; "null-pointer-is-valid"="true" should get copied to caller.
 
 define i32 @null-pointer-is-valid_callee0(i32 %i) "null-pointer-is-valid"="true" {
   ret i32 %i
@@ -355,6 +356,7 @@ define i32 @null-pointer-is-valid_callee2(i32 %i)  {
 ; CHECK-NEXT: ret i32
 }
 
+; No inlining since caller does not have "null-pointer-is-valid"="true" attribute.
 define i32 @test_null-pointer-is-valid0(i32 %i) {
   %1 = call i32 @null-pointer-is-valid_callee0(i32 %i)
   ret i32 %1
@@ -363,17 +365,22 @@ define i32 @test_null-pointer-is-valid0(i32 %i) {
 ; CHECK-NEXT: ret i32
 }
 
-define i32 @test_null-pointer-is-valid1(i32 %i) {
+; alwaysinline should force inlining even when caller does not have
+; "null-pointer-is-valid"="true" attribute. However, the attribute should be
+; copied to caller.
+define i32 @test_null-pointer-is-valid1(i32 %i) "null-pointer-is-valid"="false" {
   %1 = call i32 @null-pointer-is-valid_callee1(i32 %i)
   ret i32 %1
-; CHECK: @test_null-pointer-is-valid1(
+; CHECK: @test_null-pointer-is-valid1(i32 %i) [[NULLPOINTERISVALID:#[0-9]+]] {
 ; CHECK-NEXT: ret i32
 }
 
+; Can inline since both caller and callee have "null-pointer-is-valid"="true"
+; attribute.
 define i32 @test_null-pointer-is-valid2(i32 %i) "null-pointer-is-valid"="true" {
   %1 = call i32 @null-pointer-is-valid_callee2(i32 %i)
   ret i32 %1
-; CHECK: @test_null-pointer-is-valid2(
+; CHECK: @test_null-pointer-is-valid2(i32 %i) [[NULLPOINTERISVALID]] {
 ; CHECK-NEXT: ret i32
 }
 
@@ -381,3 +388,4 @@ define i32 @test_null-pointer-is-valid2(i32 %i) "null-pointer-is-valid"="true" {
 ; CHECK: attributes [[FPMAD_TRUE]] = { "less-precise-fpmad"="true" }
 ; CHECK: attributes [[NOIMPLICITFLOAT]] = { noimplicitfloat }
 ; CHECK: attributes [[NOUSEJUMPTABLES]] = { "no-jump-tables"="true" }
+; CHECK: attributes [[NULLPOINTERISVALID]] = { "null-pointer-is-valid"="true" }
diff --git a/test/Transforms/InstCombine/and-xor-or.ll b/test/Transforms/InstCombine/and-xor-or.ll
index e4495fa5b0ae..1eb871e594cc 100644
--- a/test/Transforms/InstCombine/and-xor-or.ll
+++ b/test/Transforms/InstCombine/and-xor-or.ll
@@ -1,6 +1,101 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+; a & (a ^ b) --> a & ~b
+
+define i32 @and_xor_common_op(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %a, %b
+  %r = and i32 %a, %xor
+  ret i32 %r
+}
+
+; a & (b ^ a) --> a & ~b
+
+define i32 @and_xor_common_op_commute1(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute1(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %b, %a
+  %r = and i32 %a, %xor
+  ret i32 %r
+}
+
+; (b ^ a) & a --> a & ~b
+
+define i32 @and_xor_common_op_commute2(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute2(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %b, %a
+  %r = and i32 %xor, %a
+  ret i32 %r
+}
+
+; (a ^ b) & a --> a & ~b
+
+define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute3(
+; CHECK-NEXT:    [[A:%.*]] = udiv <2 x i32> <i32 42, i32 43>, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv <2 x i32> <i32 43, i32 42>, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[B]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[A]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = udiv <2 x i32> <i32 42, i32 43>, %pa ; thwart complexity-based canonicalization
+  %b = udiv <2 x i32> <i32 43, i32 42>, %pb ; thwart complexity-based canonicalization
+  %xor = xor <2 x i32> %a, %b
+  %r = and <2 x i32> %xor, %a
+  ret <2 x i32> %r
+}
+
+; It's ok to match a common constant.
+; TODO: The xor should be a 'not' op (-1 constant), but demanded bits shrinks it.
+
+define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) {
+; CHECK-LABEL: @and_xor_common_op_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
+  %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
+  ret <4 x i32> %2
+}
+
+; a & (a ^ ~b) --> a & b
+
+define i32 @and_xor_not_common_op(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_xor_not_common_op(
+; CHECK-NEXT:    [[T4:%.*]] = and i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[T4]]
+;
+  %b2 = xor i32 %b, -1
+  %t2 = xor i32 %a, %b2
+  %t4 = and i32 %t2, %a
+  ret i32 %t4
+}
+
 ; rdar://10770603
 ; (x & y) | (x ^ y) -> x | y
 
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index dde786c9b009..ec23f61cb673 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -21,18 +21,6 @@ define i32 @test3(i32 %X, i32 %Y) {
   ret i32 %b
 }
 
-; Make sure we don't go into an infinite loop with this test
-define <4 x i32> @test5(<4 x i32> %A) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-;
-  %1 = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
-  %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
-  ret <4 x i32> %2
-}
-
 define i1 @test7(i32 %i, i1 %b) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %i, 0
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index 5d015bc99ae9..01c1087dec69 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,376 +11,507 @@ target triple = "x86_64-unknown-linux-gnu"
 ; PR17850: http://llvm.org/bugs/show_bug.cgi?id=17850
 
 define float @acos_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @acos(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: acos_test1
-; CHECK: call fast float @acosf(float %f)
+; CHECK-LABEL: @acos_test1(
+; CHECK-NEXT:    [[ACOSF:%.*]] = call fast float @acosf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[ACOSF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @acos(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @acos_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @acos(double %conv)
-   ret double %call
-; CHECK-LABEL: acos_test2
-; CHECK: call fast double @acos(double %conv)
+; CHECK-LABEL: @acos_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @acos(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @acos(double %conv)
+  ret double %call
 }
 
 define float @acosh_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @acosh(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: acosh_test1
-; CHECK: call fast float @acoshf(float %f)
+; CHECK-LABEL: @acosh_test1(
+; CHECK-NEXT:    [[ACOSHF:%.*]] = call fast float @acoshf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[ACOSHF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @acosh(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @acosh_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @acosh(double %conv)
-   ret double %call
-; CHECK-LABEL: acosh_test2
-; CHECK: call fast double @acosh(double %conv)
+; CHECK-LABEL: @acosh_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @acosh(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @acosh(double %conv)
+  ret double %call
 }
 
 define float @asin_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @asin(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: asin_test1
-; CHECK: call fast float @asinf(float %f)
+; CHECK-LABEL: @asin_test1(
+; CHECK-NEXT:    [[ASINF:%.*]] = call fast float @asinf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[ASINF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @asin(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @asin_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @asin(double %conv)
-   ret double %call
-; CHECK-LABEL: asin_test2
-; CHECK: call fast double @asin(double %conv)
+; CHECK-LABEL: @asin_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @asin(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @asin(double %conv)
+  ret double %call
 }
 
 define float @asinh_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @asinh(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: asinh_test1
-; CHECK: call fast float @asinhf(float %f)
+; CHECK-LABEL: @asinh_test1(
+; CHECK-NEXT:    [[ASINHF:%.*]] = call fast float @asinhf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[ASINHF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @asinh(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @asinh_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @asinh(double %conv)
-   ret double %call
-; CHECK-LABEL: asinh_test2
-; CHECK: call fast double @asinh(double %conv)
+; CHECK-LABEL: @asinh_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @asinh(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @asinh(double %conv)
+  ret double %call
 }
 
 define float @atan_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @atan(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: atan_test1
-; CHECK: call fast float @atanf(float %f)
+; CHECK-LABEL: @atan_test1(
+; CHECK-NEXT:    [[ATANF:%.*]] = call fast float @atanf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[ATANF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @atan(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @atan_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @atan(double %conv)
-   ret double %call
-; CHECK-LABEL: atan_test2
-; CHECK: call fast double @atan(double %conv)
+; CHECK-LABEL: @atan_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @atan(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @atan(double %conv)
+  ret double %call
 }
 
 define float @atanh_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @atanh(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: atanh_test1
-; CHECK: call fast float @atanhf(float %f)
+; CHECK-LABEL: @atanh_test1(
+; CHECK-NEXT:    [[ATANHF:%.*]] = call fast float @atanhf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[ATANHF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @atanh(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @atanh_test2(float %f)   {
-    %conv = fpext float %f to double
-    %call = call fast double @atanh(double %conv)
-    ret double %call
-; CHECK-LABEL: atanh_test2
-; CHECK: call fast double @atanh(double %conv)
+; CHECK-LABEL: @atanh_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @atanh(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @atanh(double %conv)
+  ret double %call
 }
 
 define float @cbrt_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @cbrt(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: cbrt_test1
-; CHECK: call fast float @cbrtf(float %f)
+; CHECK-LABEL: @cbrt_test1(
+; CHECK-NEXT:    [[CBRTF:%.*]] = call fast float @cbrtf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[CBRTF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @cbrt(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @cbrt_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast  double @cbrt(double %conv)
-   ret double %call
-; CHECK-LABEL: cbrt_test2
-; CHECK: call fast double @cbrt(double %conv)
+; CHECK-LABEL: @cbrt_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @cbrt(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast  double @cbrt(double %conv)
+  ret double %call
 }
 
 define float @exp_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @exp(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: exp_test1
-; CHECK: call fast float @expf(float %f)
+; CHECK-LABEL: @exp_test1(
+; CHECK-NEXT:    [[EXPF:%.*]] = call fast float @expf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[EXPF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @exp(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @exp_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @exp(double %conv)
-   ret double %call
-; CHECK-LABEL: exp_test2
-; CHECK: call fast double @exp(double %conv)
+; CHECK-LABEL: @exp_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @exp(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @exp(double %conv)
+  ret double %call
 }
 
 define float @expm1_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @expm1(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: expm1_test1
-; CHECK: call fast float @expm1f(float %f)
+; CHECK-LABEL: @expm1_test1(
+; CHECK-NEXT:    [[EXPM1F:%.*]] = call fast float @expm1f(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[EXPM1F]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @expm1(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @expm1_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @expm1(double %conv)
-   ret double %call
-; CHECK-LABEL: expm1_test2
-; CHECK: call fast double @expm1(double %conv)
+; CHECK-LABEL: @expm1_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @expm1(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @expm1(double %conv)
+  ret double %call
 }
 
 ; exp10f() doesn't exist for this triple, so it doesn't shrink.
 
 define float @exp10_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @exp10(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: exp10_test1
-; CHECK: call fast double @exp10(double %conv)
+; CHECK-LABEL: @exp10_test1(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @exp10(double [[CONV]])
+; CHECK-NEXT:    [[CONV1:%.*]] = fptrunc double [[CALL]] to float
+; CHECK-NEXT:    ret float [[CONV1]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @exp10(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @exp10_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @exp10(double %conv)
-   ret double %call
-; CHECK-LABEL: exp10_test2
-; CHECK: call fast double @exp10(double %conv)
+; CHECK-LABEL: @exp10_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @exp10(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @exp10(double %conv)
+  ret double %call
 }
 
 define float @log_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @log(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: log_test1
-; CHECK: call fast float @logf(float %f)
+; CHECK-LABEL: @log_test1(
+; CHECK-NEXT:    [[LOGF:%.*]] = call fast float @logf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[LOGF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @log_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @log(double %conv)
-   ret double %call
-; CHECK-LABEL: log_test2
-; CHECK: call fast double @log(double %conv)
+; CHECK-LABEL: @log_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @log(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log(double %conv)
+  ret double %call
 }
 
 define float @log10_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @log10(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: log10_test1
-; CHECK: call fast float @log10f(float %f)
+; CHECK-LABEL: @log10_test1(
+; CHECK-NEXT:    [[LOG10F:%.*]] = call fast float @log10f(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[LOG10F]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log10(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @log10_test2(float %f) {
-   %conv = fpext float %f to double
-   %call = call fast double @log10(double %conv)
-   ret double %call
-; CHECK-LABEL: log10_test2
-; CHECK: call fast double @log10(double %conv)
+; CHECK-LABEL: @log10_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @log10(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log10(double %conv)
+  ret double %call
 }
 
 define float @log1p_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @log1p(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: log1p_test1
-; CHECK: call fast float @log1pf(float %f)
+; CHECK-LABEL: @log1p_test1(
+; CHECK-NEXT:    [[LOG1PF:%.*]] = call fast float @log1pf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[LOG1PF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log1p(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @log1p_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @log1p(double %conv)
-   ret double %call
-; CHECK-LABEL: log1p_test2
-; CHECK: call fast double @log1p(double %conv)
+; CHECK-LABEL: @log1p_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @log1p(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log1p(double %conv)
+  ret double %call
 }
 
 define float @log2_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @log2(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: log2_test1
-; CHECK: call fast float @log2f(float %f)
+; CHECK-LABEL: @log2_test1(
+; CHECK-NEXT:    [[LOG2F:%.*]] = call fast float @log2f(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[LOG2F]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log2(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @log2_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @log2(double %conv)
-   ret double %call
-; CHECK-LABEL: log2_test2
-; CHECK: call fast double @log2(double %conv)
+; CHECK-LABEL: @log2_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @log2(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log2(double %conv)
+  ret double %call
 }
 
 define float @logb_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @logb(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: logb_test1
-; CHECK: call fast float @logbf(float %f)
+; CHECK-LABEL: @logb_test1(
+; CHECK-NEXT:    [[LOGBF:%.*]] = call fast float @logbf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[LOGBF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @logb(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @logb_test2(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @logb(double %conv)
-   ret double %call
-; CHECK-LABEL: logb_test2
-; CHECK: call fast double @logb(double %conv)
+; CHECK-LABEL: @logb_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @logb(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @logb(double %conv)
+  ret double %call
+}
+
+; FIXME: Miscompile - we dropped the 2nd argument!
+
+define float @pow_test1(float %f, float %g)   {
+; CHECK-LABEL: @pow_test1(
+; CHECK-NEXT:    [[POWF:%.*]] = call fast float @powf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[POWF]]
+;
+  %df = fpext float %f to double
+  %dg = fpext float %g to double
+  %call = call fast double @pow(double %df, double %dg)
+  %fr = fptrunc double %call to float
+  ret float %fr
+}
+
+; TODO: This should shrink?
+
+define double @pow_test2(float %f, float %g) {
+; CHECK-LABEL: @pow_test2(
+; CHECK-NEXT:    [[DF:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[DG:%.*]] = fpext float [[G:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @pow(double [[DF]], double [[DG]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %df = fpext float %f to double
+  %dg = fpext float %g to double
+  %call = call fast double @pow(double %df, double %dg)
+  ret double %call
 }
 
 define float @sin_test1(float %f)   {
-   %conv = fpext float %f to double
-   %call = call fast double @sin(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: sin_test1
-; CHECK: call fast float @sinf(float %f)
+; CHECK-LABEL: @sin_test1(
+; CHECK-NEXT:    [[SINF:%.*]] = call fast float @sinf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[SINF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @sin(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @sin_test2(float %f) {
-   %conv = fpext float %f to double
-   %call = call fast double @sin(double %conv)
-   ret double %call
-; CHECK-LABEL: sin_test2
-; CHECK: call fast double @sin(double %conv)
+; CHECK-LABEL: @sin_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @sin(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @sin(double %conv)
+  ret double %call
 }
 
 define float @sqrt_test1(float %f) {
-   %conv = fpext float %f to double
-   %call = call double @sqrt(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: sqrt_test1
-; CHECK: call float @sqrtf(float %f)
+; CHECK-LABEL: @sqrt_test1(
+; CHECK-NEXT:    [[SQRTF:%.*]] = call float @sqrtf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[SQRTF]]
+;
+  %conv = fpext float %f to double
+  %call = call double @sqrt(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @sqrt_test2(float %f) {
-   %conv = fpext float %f to double
-   %call = call double @sqrt(double %conv)
-   ret double %call
-; CHECK-LABEL: sqrt_test2
-; CHECK: call double @sqrt(double %conv)
+; CHECK-LABEL: @sqrt_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call double @sqrt(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call double @sqrt(double %conv)
+  ret double %call
 }
 
 define float @sqrt_int_test1(float %f) {
-   %conv = fpext float %f to double
-   %call = call double @llvm.sqrt.f64(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: sqrt_int_test1
-; CHECK: call float @llvm.sqrt.f32(float %f)
+; CHECK-LABEL: @sqrt_int_test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %conv = fpext float %f to double
+  %call = call double @llvm.sqrt.f64(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @sqrt_int_test2(float %f) {
-   %conv = fpext float %f to double
-   %call = call double @llvm.sqrt.f64(double %conv)
-   ret double %call
-; CHECK-LABEL: sqrt_int_test2
-; CHECK: call double @llvm.sqrt.f64(double %conv)
+; CHECK-LABEL: @sqrt_int_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call double @llvm.sqrt.f64(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call double @llvm.sqrt.f64(double %conv)
+  ret double %call
 }
 
 define float @tan_test1(float %f) {
-   %conv = fpext float %f to double
-   %call = call fast double @tan(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: tan_test1
-; CHECK: call fast float @tanf(float %f)
+; CHECK-LABEL: @tan_test1(
+; CHECK-NEXT:    [[TANF:%.*]] = call fast float @tanf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[TANF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @tan(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @tan_test2(float %f) {
-   %conv = fpext float %f to double
-   %call = call fast double @tan(double %conv)
-   ret double %call
-; CHECK-LABEL: tan_test2
-; CHECK: call fast double @tan(double %conv)
+; CHECK-LABEL: @tan_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @tan(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @tan(double %conv)
+  ret double %call
 }
 define float @tanh_test1(float %f) {
-   %conv = fpext float %f to double
-   %call = call fast double @tanh(double %conv)
-   %conv1 = fptrunc double %call to float
-   ret float %conv1
-; CHECK-LABEL: tanh_test1
-; CHECK: call fast float @tanhf(float %f)
+; CHECK-LABEL: @tanh_test1(
+; CHECK-NEXT:    [[TANHF:%.*]] = call fast float @tanhf(float [[F:%.*]])
+; CHECK-NEXT:    ret float [[TANHF]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @tanh(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
 }
 
 define double @tanh_test2(float %f) {
-   %conv = fpext float %f to double
-   %call = call fast double @tanh(double %conv)
-   ret double %call
-; CHECK-LABEL: tanh_test2
-; CHECK: call fast double @tanh(double %conv)
+; CHECK-LABEL: @tanh_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @tanh(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @tanh(double %conv)
+  ret double %call
 }
 
 ; 'arcp' on an fmax() is meaningless. This test just proves that
 ; flags are propagated for shrunken *binary* double FP calls.
 define float @max1(float %a, float %b) {
+; CHECK-LABEL: @max1(
+; CHECK-NEXT:    [[FMAXF:%.*]] = call arcp float @fmaxf(float [[A:%.*]], float [[B:%.*]])
+; CHECK-NEXT:    ret float [[FMAXF]]
+;
   %c = fpext float %a to double
   %d = fpext float %b to double
   %e = call arcp double @fmax(double %c, double %d)
   %f = fptrunc double %e to float
   ret float %f
-
-; CHECK-LABEL: max1(
-; CHECK-NEXT:  call arcp float @fmaxf(float %a, float %b)
-; CHECK-NEXT:  ret
 }
 
 ; A function can have a name that matches a common libcall,
 ; but with the wrong type(s). Let it be.
 
 define float @fake_fmin(float %a, float %b) {
+; CHECK-LABEL: @fake_fmin(
+; CHECK-NEXT:    [[C:%.*]] = fpext float [[A:%.*]] to fp128
+; CHECK-NEXT:    [[D:%.*]] = fpext float [[B:%.*]] to fp128
+; CHECK-NEXT:    [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc fp128 [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
   %c = fpext float %a to fp128
   %d = fpext float %b to fp128
   %e = call fp128 @fmin(fp128 %c, fp128 %d)
   %f = fptrunc fp128 %e to float
   ret float %f
-
-; CHECK-LABEL: fake_fmin(
-; CHECK-NEXT:  %c = fpext float %a to fp128
-; CHECK-NEXT:  %d = fpext float %b to fp128
-; CHECK-NEXT:  %e = call fp128 @fmin(fp128 %c, fp128 %d)
-; CHECK-NEXT:  %f = fptrunc fp128 %e to float
-; CHECK-NEXT:  ret float %f
 }
 
 declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for.
@@ -389,12 +521,13 @@ declare double @fmax(double, double)
 declare double @tanh(double)
 declare double @tan(double)
 
-; sqrt is a special case: the shrinking optimization 
+; sqrt is a special case: the shrinking optimization
 ; is valid even without unsafe-fp-math.
-declare double @sqrt(double) 
-declare double @llvm.sqrt.f64(double) 
+declare double @sqrt(double)
+declare double @llvm.sqrt.f64(double)
 
 declare double @sin(double)
+declare double @pow(double, double)
 declare double @log2(double)
 declare double @log1p(double)
 declare double @log10(double)
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index 4a4951dee7fd..fadf2ae6bf68 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -7,7 +8,9 @@ target triple = "x86_64-pc-win32"
 
 ; make sure that we are not crashing when creating an illegal type
 define void @func(%myStruct addrspace(1)* nocapture %p) nounwind {
-ST:
+; CHECK-LABEL: @func(
+; CHECK-NEXT:    ret void
+;
   %A = getelementptr inbounds %myStruct, %myStruct addrspace(1)* %p, i64 0
   %B = addrspacecast %myStruct addrspace(1)* %A to %myStruct*
   %C = getelementptr inbounds %myStruct, %myStruct* %B, i32 0, i32 1
@@ -21,14 +24,19 @@ ST:
 @scalar = internal addrspace(3) global float 0.000000e+00, align 4
 
 define void @keep_necessary_addrspacecast(i64 %i, float** %out0, float** %out1) {
-entry:
-; CHECK-LABEL: @keep_necessary_addrspacecast
-  %0 = getelementptr [256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
-; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
-  %1 = getelementptr [0 x float], [0 x float]* addrspacecast (float addrspace(3)* @scalar to [0 x float]*), i64 0, i64 %i
-; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
-  store float* %0, float** %out0, align 4
-  store float* %1, float** %out1, align 4
+; CHECK-LABEL: @keep_necessary_addrspacecast(
+; CHECK-NEXT:    [[T01:%.*]] = getelementptr [256 x float], [256 x float] addrspace(3)* @array, i64 0, i64 [[I:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = addrspacecast float addrspace(3)* [[T01]] to float*
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float addrspace(3)* @scalar, i64 [[I]]
+; CHECK-NEXT:    [[T1:%.*]] = addrspacecast float addrspace(3)* [[TMP1]] to float*
+; CHECK-NEXT:    store float* [[T0]], float** [[OUT0:%.*]], align 4
+; CHECK-NEXT:    store float* [[T1]], float** [[OUT1:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %t0 = getelementptr [256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
+  %t1 = getelementptr [0 x float], [0 x float]* addrspacecast (float addrspace(3)* @scalar to [0 x float]*), i64 0, i64 %i
+  store float* %t0, float** %out0, align 4
+  store float* %t1, float** %out1, align 4
   ret void
 }
 
@@ -37,17 +45,42 @@ declare void @escape_alloca(i16*)
 ; check that addrspacecast is not ignored (leading to an assertion failure)
 ; when trying to mark a GEP as inbounds
 define { i8, i8 } @inbounds_after_addrspacecast() {
-top:
-; CHECK-LABEL: @inbounds_after_addrspacecast
-  %0 = alloca i16, align 2
-  call void @escape_alloca(i16* %0)
-  %tmpcast = bitcast i16* %0 to [2 x i8]*
-; CHECK: addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)*
-  %1 = addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)*
-; CHECK: getelementptr [2 x i8], [2 x i8] addrspace(11)* %1, i64 0, i64 1
-  %2 = getelementptr [2 x i8], [2 x i8] addrspace(11)* %1, i64 0, i64 1
-; CHECK: addrspace(11)
-  %3 = load i8, i8 addrspace(11)* %2, align 1
-  %.fca.1.insert = insertvalue { i8, i8 } zeroinitializer, i8 %3, 1
-  ret { i8, i8 } %.fca.1.insert
+; CHECK-LABEL: @inbounds_after_addrspacecast(
+; CHECK-NEXT:    [[T0:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @escape_alloca(i16* nonnull [[T0]])
+; CHECK-NEXT:    [[TMPCAST:%.*]] = bitcast i16* [[T0]] to [2 x i8]*
+; CHECK-NEXT:    [[T1:%.*]] = addrspacecast [2 x i8]* [[TMPCAST]] to [2 x i8] addrspace(11)*
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr [2 x i8], [2 x i8] addrspace(11)* [[T1]], i64 0, i64 1
+; CHECK-NEXT:    [[T3:%.*]] = load i8, i8 addrspace(11)* [[T2]], align 1
+; CHECK-NEXT:    [[INSERT:%.*]] = insertvalue { i8, i8 } zeroinitializer, i8 [[T3]], 1
+; CHECK-NEXT:    ret { i8, i8 } [[INSERT]]
+;
+  %t0 = alloca i16, align 2
+  call void @escape_alloca(i16* %t0)
+  %tmpcast = bitcast i16* %t0 to [2 x i8]*
+  %t1 = addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)*
+  %t2 = getelementptr [2 x i8], [2 x i8] addrspace(11)* %t1, i64 0, i64 1
+  %t3 = load i8, i8 addrspace(11)* %t2, align 1
+  %insert = insertvalue { i8, i8 } zeroinitializer, i8 %t3, 1
+  ret { i8, i8 } %insert
+}
+
+
+declare spir_func <16 x i32> @my_extern_func()
+
+; check that a bitcast is not generated when we need an addrspace cast
+define void @bitcast_after_gep(<16 x i32>* %t0) {
+; CHECK-LABEL: @bitcast_after_gep(
+; CHECK-NEXT:    [[T4:%.*]] = addrspacecast <16 x i32>* [[T0:%.*]] to <16 x i32> addrspace(3)*
+; CHECK-NEXT:    [[CALL:%.*]] = call spir_func <16 x i32> @my_extern_func()
+; CHECK-NEXT:    store <16 x i32> [[CALL]], <16 x i32> addrspace(3)* [[T4]], align 64
+; CHECK-NEXT:    ret void
+;
+  %t1 = bitcast <16 x i32>* %t0 to [16 x i32]*
+  %t2 = addrspacecast [16 x i32]* %t1 to [16 x i32] addrspace(3)*
+  %t3 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %t2, i64 0, i64 0
+  %t4 = bitcast i32 addrspace(3)* %t3 to <16 x i32> addrspace(3)*
+  %call = call spir_func <16 x i32> @my_extern_func()
+  store <16 x i32> %call, <16 x i32> addrspace(3)* %t4
+  ret void
 }
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index eef4f76fb84f..cf24548db3e7 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -15,6 +15,8 @@
 
 declare float @powf(float, float) nounwind readonly
 declare double @pow(double, double) nounwind readonly
+declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) nounwind readonly
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) nounwind readonly
 
 ; Check pow(1.0, x) -> 1.0.
 
@@ -25,6 +27,13 @@ define float @test_simplify1(float %x) {
 ; CHECK-NEXT: ret float 1.000000e+00
 }
 
+define <2 x float> @test_simplify1v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify1v(
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 1.0, float 1.0>, <2 x float> %x)
+  ret <2 x float> %retval
+; CHECK-NEXT: ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+}
+
 define double @test_simplify2(double %x) {
 ; CHECK-LABEL: @test_simplify2(
   %retval = call double @pow(double 1.0, double %x)
@@ -32,6 +41,13 @@ define double @test_simplify2(double %x) {
 ; CHECK-NEXT: ret double 1.000000e+00
 }
 
+define <2 x double> @test_simplify2v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify2v(
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 1.0, double 1.0>, <2 x double> %x)
+  ret <2 x double> %retval
+; CHECK-NEXT: ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+}
+
 ; Check pow(2.0, x) -> exp2(x).
 
 define float @test_simplify3(float %x) {
@@ -42,6 +58,14 @@ define float @test_simplify3(float %x) {
 ; CHECK-NEXT: ret float [[EXP2F]]
 }
 
+define <2 x float> @test_simplify3v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify3v(
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.0, float 2.0>, <2 x float> %x)
+; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> %x)
+  ret <2 x float> %retval
+; CHECK-NEXT: ret <2 x float> [[EXP2F]]
+}
+
 define double @test_simplify4(double %x) {
 ; CHECK-LABEL: @test_simplify4(
   %retval = call double @pow(double 2.0, double %x)
@@ -50,6 +74,14 @@ define double @test_simplify4(double %x) {
 ; CHECK-NEXT: ret double [[EXP2]]
 }
 
+define <2 x double> @test_simplify4v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify4v(
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.0, double 2.0>, <2 x double> %x)
+; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> %x)
+  ret <2 x double> %retval
+; CHECK-NEXT: ret <2 x double> [[EXP2]]
+}
+
 ; Check pow(x, 0.0) -> 1.0.
 
 define float @test_simplify5(float %x) {
@@ -59,6 +91,13 @@ define float @test_simplify5(float %x) {
 ; CHECK-NEXT: ret float 1.000000e+00
 }
 
+define <2 x float> @test_simplify5v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify5v(
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 0.0, float 0.0>)
+  ret <2 x float> %retval
+; CHECK-NEXT: %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
+}
+
 define double @test_simplify6(double %x) {
 ; CHECK-LABEL: @test_simplify6(
   %retval = call double @pow(double %x, double 0.0)
@@ -66,6 +105,13 @@ define double @test_simplify6(double %x) {
 ; CHECK-NEXT: ret double 1.000000e+00
 }
 
+define <2 x double> @test_simplify6v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify6v(
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 0.0, double 0.0>)
+  ret <2 x double> %retval
+; CHECK-NEXT: %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> zeroinitializer)
+}
+
 ; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity.
 
 define float @test_simplify7(float %x) {
@@ -115,6 +161,13 @@ define float @test_simplify11(float %x) {
 ; CHECK-NEXT: ret float %x
 }
 
+define <2 x float> @test_simplify11v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify11v(
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.0, float 1.0>)
+  ret <2 x float> %retval
+; CHECK-NEXT: %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+}
+
 define double @test_simplify12(double %x) {
 ; CHECK-LABEL: @test_simplify12(
   %retval = call double @pow(double %x, double 1.0)
@@ -122,6 +175,13 @@ define double @test_simplify12(double %x) {
 ; CHECK-NEXT: ret double %x
 }
 
+define <2 x double> @test_simplify12v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify12v(
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 1.0, double 1.0>)
+  ret <2 x double> %retval
+; CHECK-NEXT: %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 1.000000e+00, double 1.000000e+00>)
+}
+
 ; Check pow(x, 2.0) -> x*x.
 
 define float @pow2_strict(float %x) {
@@ -133,6 +193,15 @@ define float @pow2_strict(float %x) {
   ret float %r
 }
 
+define <2 x float> @pow2_strictv(<2 x float> %x) {
+; CHECK-LABEL: @pow2_strictv(
+; CHECK-NEXT:    [[POW2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 2.000000e+00, float 2.000000e+00>)
+; CHECK-NEXT:    ret <2 x float> [[POW2]]
+;
+  %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 2.0, float 2.0>)
+  ret <2 x float> %r
+}
+
 define double @pow2_double_strict(double %x) {
 ; CHECK-LABEL: @pow2_double_strict(
 ; CHECK-NEXT:    [[POW2:%.*]] = fmul double %x, %x
@@ -141,6 +210,14 @@ define double @pow2_double_strict(double %x) {
   %r = call double @pow(double %x, double 2.0)
   ret double %r
 }
+define <2 x double> @pow2_double_strictv(<2 x double> %x) {
+; CHECK-LABEL: @pow2_double_strictv(
+; CHECK-NEXT:    [[POW2:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 2.000000e+00, double 2.000000e+00>)
+; CHECK-NEXT:    ret <2 x double> [[POW2]]
+;
+  %r = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 2.0, double 2.0>)
+  ret <2 x double> %r
+}
 
 ; Don't drop the FMF - PR35601 ( https://bugs.llvm.org/show_bug.cgi?id=35601 )
 
@@ -164,6 +241,15 @@ define float @pow_neg1_strict(float %x) {
   ret float %r
 }
 
+define <2 x float> @pow_neg1_strictv(<2 x float> %x) {
+; CHECK-LABEL: @pow_neg1_strictv(
+; CHECK-NEXT:    [[POWRECIP:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float -1.000000e+00, float -1.000000e+00>)
+; CHECK-NEXT:    ret <2 x float> [[POWRECIP]]
+;
+  %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
+  ret <2 x float> %r
+}
+
 define double @pow_neg1_double_fast(double %x) {
 ; CHECK-LABEL: @pow_neg1_double_fast(
 ; CHECK-NEXT:    [[POWRECIP:%.*]] = fdiv fast double 1.000000e+00, %x
@@ -173,6 +259,15 @@ define double @pow_neg1_double_fast(double %x) {
   ret double %r
 }
 
+define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) {
+; CHECK-LABEL: @pow_neg1_double_fastv(
+; CHECK-NEXT:    [[POWRECIP:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -1.000000e+00, double -1.000000e+00>)
+; CHECK-NEXT:    ret <2 x double> [[POWRECIP]]
+;
+  %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -1.0, double -1.0>)
+  ret <2 x double> %r
+}
+
 declare double @llvm.pow.f64(double %Val, double %Power)
 define double @test_simplify17(double %x) {
 ; CHECK-LABEL: @test_simplify17(
diff --git a/test/Transforms/InstCombine/pow-cbrt.ll b/test/Transforms/InstCombine/pow-cbrt.ll
new file mode 100644
index 000000000000..00fa510b04e6
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-cbrt.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @pow_intrinsic_third_fast(double %x) {
+; CHECK-LABEL: @pow_intrinsic_third_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call fast double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+  ret double %pow
+}
+
+define float @powf_intrinsic_third_fast(float %x) {
+; CHECK-LABEL: @powf_intrinsic_third_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call fast float @llvm.pow.f32(float %x, float 0x3fd5555560000000)
+  ret float %pow
+}
+
+define double @pow_intrinsic_third_approx(double %x) {
+; CHECK-LABEL: @pow_intrinsic_third_approx(
+; CHECK-NEXT:    [[POW:%.*]] = call afn double @llvm.pow.f64(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+  ret double %pow
+}
+
+define float @powf_intrinsic_third_approx(float %x) {
+; CHECK-LABEL: @powf_intrinsic_third_approx(
+; CHECK-NEXT:    [[POW:%.*]] = call afn float @llvm.pow.f32(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call afn float @llvm.pow.f32(float %x, float 0x3fd5555560000000)
+  ret float %pow
+}
+
+define double @pow_libcall_third_fast(double %x) {
+; CHECK-LABEL: @pow_libcall_third_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call fast double @pow(double %x, double 0x3fd5555555555555)
+  ret double %pow
+}
+
+define float @powf_libcall_third_fast(float %x) {
+; CHECK-LABEL: @powf_libcall_third_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call fast float @powf(float %x, float 0x3fd5555560000000)
+  ret float %pow
+}
+
+define double @pow_intrinsic_negthird_fast(double %x) {
+; CHECK-LABEL: @pow_intrinsic_negthird_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call fast double @llvm.pow.f64(double %x, double 0xbfd5555555555555)
+  ret double %pow
+}
+
+define float @powf_intrinsic_negthird_fast(float %x) {
+; CHECK-LABEL: @powf_intrinsic_negthird_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call fast float @llvm.pow.f32(float %x, float 0xbfd5555560000000)
+  ret float %pow
+}
+
+define double @pow_intrinsic_negthird_approx(double %x) {
+; CHECK-LABEL: @pow_intrinsic_negthird_approx(
+; CHECK-NEXT:    [[POW:%.*]] = call afn double @llvm.pow.f64(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call afn double @llvm.pow.f64(double %x, double 0xbfd5555555555555)
+  ret double %pow
+}
+
+define float @powf_intrinsic_negthird_approx(float %x) {
+; CHECK-LABEL: @powf_intrinsic_negthird_approx(
+; CHECK-NEXT:    [[POW:%.*]] = call afn float @llvm.pow.f32(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call afn float @llvm.pow.f32(float %x, float 0xbfd5555560000000)
+  ret float %pow
+}
+
+define double @pow_libcall_negthird_fast(double %x) {
+; CHECK-LABEL: @pow_libcall_negthird_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call fast double @pow(double %x, double 0xbfd5555555555555)
+  ret double %pow
+}
+
+define float @powf_libcall_negthird_fast(float %x) {
+; CHECK-LABEL: @powf_libcall_negthird_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call fast float @powf(float %x, float 0xbfd5555560000000)
+  ret float %pow
+}
+
+declare double @llvm.pow.f64(double, double) #0
+declare float @llvm.pow.f32(float, float) #0
+declare double @pow(double, double)
+declare float @powf(float, float)
+
+attributes #0 = { nounwind readnone speculatable }
diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll
index c07a82ccedda..3b885ad5bdae 100644
--- a/test/Transforms/InstCombine/pow-sqrt.ll
+++ b/test/Transforms/InstCombine/pow-sqrt.ll
@@ -20,9 +20,9 @@ define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) {
 
 define double @pow_libcall_half_approx(double %x) {
 ; CHECK-LABEL: @pow_libcall_half_approx(
-; CHECK-NEXT:    [[SQRT:%.*]] = call double @sqrt(double %x)
-; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[SQRT]])
-; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT:    [[SQRT:%.*]] = call afn double @sqrt(double %x)
+; CHECK-NEXT:    [[TMP1:%.*]] = call afn double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp afn oeq double %x, 0xFFF0000000000000
 ; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], double 0x7FF0000000000000, double [[TMP1]]
 ; CHECK-NEXT:    ret double [[TMP3]]
 ;
diff --git a/test/Transforms/InstCombine/select-binop-icmp.ll b/test/Transforms/InstCombine/select-binop-icmp.ll
new file mode 100644
index 000000000000..a2ac68fa21e3
--- /dev/null
+++ b/test/Transforms/InstCombine/select-binop-icmp.ll
@@ -0,0 +1,391 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @use(<2 x i1>)
+
+define i32 @select_xor_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_meta(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_meta(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]], !prof !0
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y, !prof !0
+  ret i32 %C
+}
+
+define i32 @select_mul_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_mul_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = mul i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_add_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_add_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = add i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = or i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, -1
+  %B = and i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define <2 x i8> @select_xor_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp eq <2 x i8>  %x, <i8 0, i8 0>
+  %B = xor <2 x i8>  %x, %z
+  %C = select <2 x i1>  %A, <2 x i8>  %B, <2 x i8>  %y
+  ret <2 x i8>  %C
+}
+
+define <2 x i8> @select_xor_icmp_vec_use(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_use(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    call void @use(<2 x i1> [[A]])
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp ne <2 x i8>  %x, <i8 0, i8 0>
+  call void @use(<2 x i1> %A)
+  %B = xor <2 x i8>  %x, %z
+  %C = select <2 x i1>  %A, <2 x i8>  %y, <2 x i8>  %B
+  ret <2 x i8>  %C
+}
+
+define i32 @select_xor_inv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_inv_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = xor i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_inv_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_inv_icmp2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+; Negative tests
+define i32 @select_xor_icmp_bad_1(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_xor_icmp_bad_1(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, %k
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_2(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_xor_icmp_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[K:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = xor i32 %k, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 3
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_4(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_xor_icmp_bad_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, %k
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_5(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_5(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_6(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 1
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 5, i8 3>
+; CHECK-NEXT:    [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp eq <2 x i8>  %x, <i8 5, i8 3>
+  %B = xor <2 x i8>  %x, %z
+  %C = select <2 x i1>  %A, <2 x i8>  %B, <2 x i8>  %y
+  ret <2 x i8>  %C
+}
+
+; TODO: support for undefs, check for an identity constant does not handle them yet
+define <2 x i8> @select_xor_icmp_vec_bad_2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 0, i8 undef>
+; CHECK-NEXT:    [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp eq <2 x i8>  %x, <i8 0, i8 undef>
+  %B = xor <2 x i8>  %x, %z
+  %C = select <2 x i1>  %A, <2 x i8>  %B, <2 x i8>  %y
+  ret <2 x i8>  %C
+}
+
+define i32 @select_mul_icmp_bad(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_mul_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 3
+  %B = mul i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_add_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_add_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = add i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_and_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = and i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_or_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = or i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 3
+  %B = or i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+; TODO: Support for non-commutative opcodes
+define i32 @select_sub_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = sub i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = sub i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_shl_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_shl_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = shl i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_lshr_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_lshr_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = lshr i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = lshr i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_ashr_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_ashr_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = ashr i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+; TODO: Support for FP opcodes
+define float @select_fadd_icmp(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_icmp(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], -0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -0.0
+  %B = fadd float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fadd_icmp2(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_icmp2(
+; CHECK-NEXT:    [[A:%.*]] = fcmp ueq float [[X:%.*]], -0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp ueq float %x, -0.0
+  %B = fadd float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fmul_icmp(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fmul_icmp(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fmul float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 1.0
+  %B = fmul float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+!0 = !{!"branch_weights", i32 2, i32 10}
diff --git a/test/Transforms/InstCombine/sub-not.ll b/test/Transforms/InstCombine/sub-not.ll
index 5fc42367dad4..cd1f8f3bd52d 100644
--- a/test/Transforms/InstCombine/sub-not.ll
+++ b/test/Transforms/InstCombine/sub-not.ll
@@ -40,8 +40,8 @@ define <2 x i8> @sub_not_vec(<2 x i8> %x, <2 x i8> %y) {
 
 define i8 @dec_sub(i8 %x, i8 %y) {
 ; CHECK-LABEL: @dec_sub(
-; CHECK-NEXT:    [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = add i8 [[S]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %s = sub i8 %x, %y
@@ -64,8 +64,8 @@ define i8 @dec_sub_extra_use(i8 %x, i8 %y) {
 
 define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @dec_sub_vec(
-; CHECK-NEXT:    [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[S]], <i8 -1, i8 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %s = sub <2 x i8> %x, %y
@@ -75,8 +75,8 @@ define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) {
 
 define i8 @sub_inc(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_inc(
-; CHECK-NEXT:    [[S:%.*]] = add i8 [[X:%.*]], 1
-; CHECK-NEXT:    [[R:%.*]] = sub i8 [[Y:%.*]], [[S]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %s = add i8 %x, 1
@@ -99,8 +99,8 @@ define i8 @sub_inc_extra_use(i8 %x, i8 %y) {
 
 define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @sub_inc_vec(
-; CHECK-NEXT:    [[S:%.*]] = add <2 x i8> [[X:%.*]], <i8 undef, i8 1>
-; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[Y:%.*]], [[S]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %s = add <2 x i8> %x, <i8 undef, i8 1>
@@ -108,3 +108,38 @@ define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %r
 }
 
+define i8 @sub_dec(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_dec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = add i8 %x, -1
+  %r = sub i8 %s, %y
+  ret i8 %r
+}
+
+define i8 @sub_dec_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_dec_extra_use(
+; CHECK-NEXT:    [[S:%.*]] = add i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[S]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[S]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = add i8 %x, -1
+  %r = sub i8 %s, %y
+  call void @use(i8 %s)
+  ret i8 %r
+}
+
+define <2 x i8> @sub_dec_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sub_dec_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %s = add <2 x i8> %x, <i8 undef, i8 -1>
+  %r = sub <2 x i8> %s, %y
+  ret <2 x i8> %r
+}
+
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index 0724af1f4221..2fc5270587fc 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -328,17 +328,6 @@ define i32 @test25(i32 %g, i32 %h) {
   ret i32 %t4
 }
 
-define i32 @test26(i32 %a, i32 %b) {
-; CHECK-LABEL: @test26(
-; CHECK-NEXT:    [[T4:%.*]] = and i32 %b, %a
-; CHECK-NEXT:    ret i32 [[T4]]
-;
-  %b2 = xor i32 %b, -1
-  %t2 = xor i32 %a, %b2
-  %t4 = and i32 %t2, %a
-  ret i32 %t4
-}
-
 define i32 @test27(i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: @test27(
 ; CHECK-NEXT:    [[T6:%.*]] = icmp eq i32 %b, %c
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index 251b4dea63b5..ed68f1121278 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -999,28 +999,26 @@ define i64 @shl_or_and2(i32 %a, i1 %b) {
   ret i64 %tmp5
 }
 
-define i32 @shl_or_and3(i32 %a, i32 %b) {
 ; concatinate two 32-bit integers and extract lower 32-bit
+define i64 @shl_or_and3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @shl_or_and3(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT:    ret i32 [[TMP6]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
 ;
   %tmp1 = zext i32 %a to i64
   %tmp2 = zext i32 %b to i64
   %tmp3 = shl nuw i64 %tmp1, 32
   %tmp4 = or i64 %tmp2, %tmp3
   %tmp5 = and i64 %tmp4, 4294967295
-  %tmp6 = trunc i64 %tmp5 to i32
-  ret i32 %tmp6
+  ret i64 %tmp5
 }
 
-define i32 @shl_or_and4(i16 %a, i16 %b) {
 ; concatinate two 16-bit integers and extract higher 16-bit
+define i32 @shl_or_and4(i16 %a, i16 %b) {
 ; CHECK-LABEL: @shl_or_and4(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
@@ -1037,27 +1035,25 @@ define i32 @shl_or_and4(i16 %a, i16 %b) {
   ret i32 %tmp5
 }
 
-define i64 @shl_or_and5(i64 %a, i1 %b) {
+define i128 @shl_or_and5(i64 %a, i1 %b) {
 ; CHECK-LABEL: @shl_or_and5(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i64 [[A:%.*]] to i128
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i128 [[TMP1]], 64
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i128 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i128 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT:    ret i64 [[TMP6]]
+; CHECK-NEXT:    ret i128 [[TMP5]]
 ;
   %tmp1 = zext i64 %a to i128
   %tmp2 = zext i1 %b to i128
   %tmp3 = shl nuw i128 %tmp1, 64
   %tmp4 = or i128 %tmp2, %tmp3
   %tmp5 = and i128 %tmp4, 1
-  %tmp6 = trunc i128 %tmp5 to i64
-  ret i64 %tmp6
+  ret i128 %tmp5
 }
 
+; A variation of above test cases; it fails due to the mask value
 define i32 @shl_or_and6(i16 %a, i16 %b) {
-; A variation of above test case, but fails due to the mask value
 ; CHECK-LABEL: @shl_or_and6(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
@@ -1074,8 +1070,8 @@ define i32 @shl_or_and6(i16 %a, i16 %b) {
   ret i32 %tmp5
 }
 
+; A variation of above test cases; it fails due to the mask value
 define i32 @shl_or_and7(i16 %a, i16 %b) {
-; A variation of above test case, but fails due to the mask value
 ; CHECK-LABEL: @shl_or_and7(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
@@ -1092,8 +1088,8 @@ define i32 @shl_or_and7(i16 %a, i16 %b) {
   ret i32 %tmp5
 }
 
+; A variation of above test cases; it fails due to the mask value
 define i32 @shl_or_and8(i16 %a, i16 %b) {
-; A variation of above test case, but fails due to the mask value
 ; CHECK-LABEL: @shl_or_and8(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
@@ -1109,3 +1105,55 @@ define i32 @shl_or_and8(i16 %a, i16 %b) {
   %tmp5 = and i32 %tmp4, 131071 ; mask with 0x1FFFF
   ret i32 %tmp5
 }
+
+define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) {
+; CHECK-LABEL: @shl_or_and1v(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
+;
+  %tmp1 = zext <2 x i32> %a to <2 x i64>
+  %tmp2 = zext <2 x i1> %b to <2 x i64>
+  %tmp3 = shl nuw <2 x i64> %tmp1, <i64 32, i64 32>
+  %tmp4 = or <2 x i64> %tmp3, %tmp2
+  %tmp5 = and <2 x i64> %tmp4, <i64 1, i64 1>
+  ret <2 x i64> %tmp5
+}
+
+define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) {
+; CHECK-LABEL: @shl_or_and2v(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 4294967296, i64 4294967296>
+; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
+;
+  %tmp1 = zext <2 x i1> %b to <2 x i64>
+  %tmp2 = zext <2 x i32> %a to <2 x i64>
+  %tmp3 = shl nuw <2 x i64> %tmp1, <i64 32, i64 32>
+  %tmp4 = or <2 x i64> %tmp2, %tmp3
+  %tmp5 = and <2 x i64> %tmp4, <i64 4294967296, i64 4294967296>
+  ret <2 x i64> %tmp5
+}
+
+define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) {
+; A variation of above test case, but fails due to the mask value
+; CHECK-LABEL: @shl_or_and3v(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i32> [[TMP4]], <i32 -65535, i32 -65535>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %tmp1 = zext <2 x i16> %a to <2 x i32>
+  %tmp2 = zext <2 x i16> %b to <2 x i32>
+  %tmp3 = shl nuw <2 x i32> %tmp1, <i32 16, i32 16>
+  %tmp4 = or <2 x i32> %tmp2, %tmp3
+  %tmp5 = and <2 x i32> %tmp4, <i32 4294901761, i32 4294901761> ; mask with 0xFFFF0001
+  ret <2 x i32> %tmp5
+}
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 080d3ed22219..1e581dd4d7c7 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -431,22 +431,72 @@ declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8
 declare double @llvm.powi.f64(double, i32)
 declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32)
 
-define double @constant_fold_powi() nounwind uwtable ssp {
+define double @constant_fold_powi() {
 ; CHECK-LABEL: @constant_fold_powi(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret double 9.000000e+00
 ;
-entry:
-  %0 = call double @llvm.powi.f64(double 3.00000e+00, i32 2)
-  ret double %0
+  %t0 = call double @llvm.powi.f64(double 3.00000e+00, i32 2)
+  ret double %t0
 }
 
-define <2 x double> @constant_fold_powi_vec() nounwind uwtable ssp {
+define <2 x double> @constant_fold_powi_vec() {
 ; CHECK-LABEL: @constant_fold_powi_vec(
-; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret <2 x double> <double 9.000000e+00, double 2.500000e+01>
 ;
-entry:
-  %0 = call <2 x double> @llvm.powi.v2f64(<2 x double> <double 3.00000e+00, double 5.00000e+00>, i32 2)
-  ret <2 x double> %0
+  %t0 = call <2 x double> @llvm.powi.v2f64(<2 x double> <double 3.00000e+00, double 5.00000e+00>, i32 2)
+  ret <2 x double> %t0
+}
+
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i9 @llvm.fshr.i9(i9, i9, i9)
+declare <2 x i7> @llvm.fshl.v2i7(<2 x i7>, <2 x i7>, <2 x i7>)
+declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>)
+
+define i8 @fshl_no_shift(i8 %x, i8 %y) {
+; CHECK-LABEL: @fshl_no_shift(
+; CHECK-NEXT:    ret i8 [[X:%.*]]
+;
+  %z = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 0)
+  ret i8 %z
+}
+
+define i9 @fshr_no_shift(i9 %x, i9 %y) {
+; CHECK-LABEL: @fshr_no_shift(
+; CHECK-NEXT:    ret i9 [[Y:%.*]]
+;
+  %z = call i9 @llvm.fshr.i9(i9 %x, i9 %y, i9 0)
+  ret i9 %z
+}
+
+define i8 @fshl_no_shift_modulo_bitwidth(i8 %x, i8 %y) {
+; CHECK-LABEL: @fshl_no_shift_modulo_bitwidth(
+; CHECK-NEXT:    ret i8 [[X:%.*]]
+;
+  %z = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 40)
+  ret i8 %z
+}
+
+define i9 @fshr_no_shift_modulo_bitwidth(i9 %x, i9 %y) {
+; CHECK-LABEL: @fshr_no_shift_modulo_bitwidth(
+; CHECK-NEXT:    ret i9 [[Y:%.*]]
+;
+  %z = call i9 @llvm.fshr.i9(i9 %x, i9 %y, i9 189)
+  ret i9 %z
+}
+
+define <2 x i7> @fshl_no_shift_modulo_bitwidth_splat(<2 x i7> %x, <2 x i7> %y) {
+; CHECK-LABEL: @fshl_no_shift_modulo_bitwidth_splat(
+; CHECK-NEXT:    ret <2 x i7> [[X:%.*]]
+;
+  %z = call <2 x i7> @llvm.fshl.v2i7(<2 x i7> %x, <2 x i7> %y, <2 x i7> <i7 21, i7 21>)
+  ret <2 x i7> %z
+}
+
+define <2 x i8> @fshr_no_shift_modulo_bitwidth_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @fshr_no_shift_modulo_bitwidth_splat(
+; CHECK-NEXT:    ret <2 x i8> [[Y:%.*]]
+;
+  %z = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> <i8 72, i8 72>)
+  ret <2 x i8> %z
 }
+
diff --git a/test/Transforms/InstSimplify/select-and-cmp.ll b/test/Transforms/InstSimplify/select-and-cmp.ll
new file mode 100644
index 000000000000..7153972c79c8
--- /dev/null
+++ b/test/Transforms/InstSimplify/select-and-cmp.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define <2 x i8> @select_and_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_and_icmp_vec(
+; CHECK-NEXT:    ret <2 x i8> [[X:%.*]]
+;
+  %A = icmp eq <2 x i8> %x, %z
+  %B = icmp eq <2 x i8> %y, %z
+  %C = and <2 x i1> %A, %B
+  %D = select <2 x i1> %C, <2 x i8> %z, <2 x i8> %x
+  ret <2 x i8> %D
+}
+
+define i32 @select_and_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp2(
+; CHECK-NEXT:    ret i32 [[Y:%.*]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %y
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt(
+; CHECK-NEXT:    ret i32 [[Z:%.*]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt2(
+; CHECK-NEXT:    ret i32 [[Z:%.*]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %y, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_inv_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_inv_alt(
+; CHECK-NEXT:    ret i32 [[Z:%.*]]
+;
+  %A = icmp eq i32 %z, %x
+  %B = icmp eq i32 %z, %y
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_inv_icmp_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_inv_icmp_alt(
+; CHECK-NEXT:    ret i32 [[Z:%.*]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %B, %A
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_inv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_inv_icmp(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %B , %A
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define <2 x i8> @select_and_icmp_alt_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_and_icmp_alt_vec(
+; CHECK-NEXT:    ret <2 x i8> [[Z:%.*]]
+;
+  %A = icmp eq <2 x i8> %x, %z
+  %B = icmp eq <2 x i8> %y, %z
+  %C = and <2 x i1> %A, %B
+  %D = select <2 x i1> %C, <2 x i8> %x, <2 x i8> %z
+  ret <2 x i8> %D
+}
+
+
+define i32 @select_and_icmp_inv(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_inv(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %A = icmp eq i32 %z, %x
+  %B = icmp eq i32 %z, %y
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+; Negative tests
+define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_1(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_bad_true_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_bad_true_val(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[K:%.*]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %k, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_bad_false_val(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[K:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %k
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_bad_op(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_bad_op(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[K:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %k, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_bad_op_2(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_bad_op_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %k
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_1(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_5(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_5(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %k
+  %B = icmp eq i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_true_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_true_val(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[K:%.*]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %k, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_and_icmp_alt_bad_false_val(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[K:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %k
+  ret i32 %D
+}
diff --git a/test/Transforms/InstSimplify/select-or-cmp.ll b/test/Transforms/InstSimplify/select-or-cmp.ll
new file mode 100644
index 000000000000..ea29bff7d1c4
--- /dev/null
+++ b/test/Transforms/InstSimplify/select-or-cmp.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp(
+; CHECK-NEXT:    ret i32 [[Z:%.*]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define <2 x i8> @select_or_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_or_icmp_vec(
+; CHECK-NEXT:    ret <2 x i8> [[Z:%.*]]
+;
+  %A = icmp ne <2 x i8> %x, %z
+  %B = icmp ne <2 x i8> %y, %z
+  %C = or <2 x i1> %A, %B
+  %D = select <2 x i1> %C, <2 x i8> %z, <2 x i8> %x
+  ret <2 x i8> %D
+}
+
+define i32 @select_or_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp2(
+; CHECK-NEXT:    ret i32 [[Z:%.*]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %y
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt2(
+; CHECK-NEXT:    ret i32 [[Y:%.*]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %y, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_inv_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_inv_alt(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %A = icmp ne i32 %z, %x
+  %B = icmp ne i32 %z, %y
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_inv_icmp_alt(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_inv_icmp_alt(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %A = icmp ne i32 %z, %x
+  %B = icmp ne i32 %z, %y
+  %C = or i1 %B, %A
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define <2 x i8> @select_or_icmp_alt_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_or_icmp_alt_vec(
+; CHECK-NEXT:    ret <2 x i8> [[X:%.*]]
+;
+  %A = icmp ne <2 x i8> %x, %z
+  %B = icmp ne <2 x i8> %y, %z
+  %C = or <2 x i1> %A, %B
+  %D = select <2 x i1> %C, <2 x i8> %x, <2 x i8> %z
+  ret <2 x i8> %D
+}
+
+define i32 @select_or_inv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_inv_icmp(
+; CHECK-NEXT:    ret i32 [[Z:%.*]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %B , %A
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_inv(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_inv(
+; CHECK-NEXT:    ret i32 [[Z:%.*]]
+;
+  %A = icmp ne i32 %z, %x
+  %B = icmp ne i32 %z, %y
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+; Negative tests
+define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_1(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_pred_bad_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_bad_true_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_bad_true_val(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[K:%.*]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %k, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_bad_false_val(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[K:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %k
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_bad_op(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_bad_op(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[K:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %k, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+
+define i32 @select_or_icmp_bad_op_2(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_bad_op_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %k
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %z, i32 %x
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_1(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp eq i32 %x, %z
+  %B = icmp eq i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_5(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_5(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %k
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_true_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_true_val(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[K:%.*]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %k, i32 %z
+  ret i32 %D
+}
+
+define i32 @select_or_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_or_icmp_alt_bad_false_val(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i32 [[X]], i32 [[K:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %A = icmp ne i32 %x, %z
+  %B = icmp ne i32 %y, %z
+  %C = or i1 %A, %B
+  %D = select i1 %C, i32 %x, i32 %k
+  ret i32 %D
+}
diff --git a/test/Transforms/InstSimplify/shift.ll b/test/Transforms/InstSimplify/shift.ll
index 7a09ef971514..cbffd371853b 100644
--- a/test/Transforms/InstSimplify/shift.ll
+++ b/test/Transforms/InstSimplify/shift.ll
@@ -175,41 +175,65 @@ define <2 x i8> @shl_by_sext_bool_vec(<2 x i1> %x, <2 x i8> %y) {
   ret <2 x i8> %r
 }
 
-define i32 @shl_or_shr(i32 %a, i32 %b) {
+define i64 @shl_or_shr(i32 %a, i32 %b) {
 ; CHECK-LABEL: @shl_or_shr(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP4]], 32
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT:    ret i32 [[TMP6]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %tmp1 = zext i32 %a to i64
   %tmp2 = zext i32 %b to i64
   %tmp3 = shl nuw i64 %tmp1, 32
   %tmp4 = or i64 %tmp2, %tmp3
   %tmp5 = lshr i64 %tmp4, 32
-  %tmp6 = trunc i64 %tmp5 to i32
-  ret i32 %tmp6
+  ret i64 %tmp5
 }
 
-define i32 @shl_or_shr2(i32 %a, i32 %b) {
 ; Since shift count of shl is smaller than the size of %b, OR cannot be eliminated.
+define i64 @shl_or_shr2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @shl_or_shr2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 31
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP4]], 31
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT:    ret i32 [[TMP6]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
 ;
   %tmp1 = zext i32 %a to i64
   %tmp2 = zext i32 %b to i64
   %tmp3 = shl nuw i64 %tmp1, 31
   %tmp4 = or i64 %tmp2, %tmp3
   %tmp5 = lshr i64 %tmp4, 31
-  %tmp6 = trunc i64 %tmp5 to i32
-  ret i32 %tmp6
+  ret i64 %tmp5
+}
+
+; Unit test for vector integer
+define <2 x i64> @shl_or_shr1v(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @shl_or_shr1v(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %tmp1 = zext <2 x i32> %a to <2 x i64>
+  %tmp2 = zext <2 x i32> %b to <2 x i64>
+  %tmp3 = shl nuw <2 x i64> %tmp1, <i64 32, i64 32>
+  %tmp4 = or <2 x i64> %tmp3, %tmp2
+  %tmp5 = lshr <2 x i64> %tmp4, <i64 32, i64 32>
+  ret <2 x i64> %tmp5
+}
+
+; Negative unit test for vector integer
+define <2 x i64> @shl_or_shr2v(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @shl_or_shr2v(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 31, i64 31>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i64> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <2 x i64> [[TMP4]], <i64 31, i64 31>
+; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
+;
+  %tmp1 = zext <2 x i32> %a to <2 x i64>
+  %tmp2 = zext <2 x i32> %b to <2 x i64>
+  %tmp3 = shl nuw <2 x i64> %tmp1, <i64 31, i64 31>
+  %tmp4 = or <2 x i64> %tmp2, %tmp3
+  %tmp5 = lshr <2 x i64> %tmp4, <i64 31, i64 31>
+  ret <2 x i64> %tmp5
 }
diff --git a/test/Transforms/LCSSA/basictest.ll b/test/Transforms/LCSSA/basictest.ll
index 910e6f8f607d..7ca552039b63 100644
--- a/test/Transforms/LCSSA/basictest.ll
+++ b/test/Transforms/LCSSA/basictest.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -lcssa -S | FileCheck %s
 ; RUN: opt < %s -passes=lcssa -S | FileCheck %s
-; RUN: opt < %s -debugify -lcssa -S | FileCheck -check-prefix=CHECK2 %s
+; RUN: opt < %s -debugify -lcssa -S | FileCheck -check-prefix=DEBUGIFY %s
 
 define void @lcssa(i1 %S2) {
 ; CHECK-LABEL: @lcssa
@@ -19,9 +19,12 @@ post.if:		; preds = %if.false, %if.true
 	br i1 %S2, label %loop.exit, label %loop.interior
 loop.exit:		; preds = %post.if
 ; CHECK: %X3.lcssa = phi i32
-; CHECK2: call void @llvm.dbg.value(metadata i32 %X3.lcssa
+; DEBUGIFY: %X3.lcssa = phi i32 {{.*}}, !dbg ![[DbgLoc:[0-9]+]]
+; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata i32 %X3.lcssa
 ; CHECK: %X4 = add i32 3, %X3.lcssa
 	%X4 = add i32 3, %X3		; <i32> [#uses=0]
 	ret void
 }
 
+; Make sure the lcssa phi has %X3's debug location
+; DEBUGIFY: ![[DbgLoc]] = !DILocation(line: 7
diff --git a/test/Transforms/SCCP/preserve-analysis.ll b/test/Transforms/SCCP/preserve-analysis.ll
index 52d2941d81ba..8d34e7195b95 100644
--- a/test/Transforms/SCCP/preserve-analysis.ll
+++ b/test/Transforms/SCCP/preserve-analysis.ll
@@ -7,11 +7,9 @@
 ; CHECK: Globals Alias Analysis
 ; CHECK: Dominator Tree Construction
 ; CHECK: Natural Loop Information
-; CHECK: Basic Alias Analysis (stateless AA impl)
 ; CHECK: Sparse Conditional Constant Propagation
 ; CHECK-NOT: Dominator Tree Construction
 ; CHECK-NOT: Natural Loop Information
-; CHECK-NOT: Basic Alias Analysis (stateless AA impl)
 ; CHECK-NOT: Globals Alias Analysis
 ; CHECK: Loop Vectorization
 
diff --git a/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
new file mode 100644
index 000000000000..1ab4a13260ed
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S -mtriple=aarch64-apple-ios -mcpu=cyclone -o - %s | FileCheck %s
+
+define void @f1(<2 x i16> %x, i16* %a) {
+; CHECK-LABEL: @f1(
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[X:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+; CHECK-NEXT:    [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
+; CHECK-NEXT:    store i16 [[TMP1]], i16* [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
+; CHECK-NEXT:    store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP2]], align 2
+; CHECK-NEXT:    ret void
+;
+  %t2 = extractelement <2 x i16> %x, i32 0
+  %t3 = extractelement <2 x i16> %x, i32 1
+  %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+  %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+  %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+  %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+  store i16 %t2, i16* %a
+  store i16 %t2, i16* %ptr0
+  store i16 %t3, i16* %ptr1
+  store i16 %t3, i16* %ptr2
+  store i16 %t2, i16* %ptr3
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
index f730ef2aed3c..613eedde96bb 100644
--- a/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -5,15 +5,15 @@
 define void @test_simple(i32* %p, i32 %a, i32 %b) {
 ; CHECK-LABEL: @test_simple(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[X1:%.*]] = icmp ne i32 [[A:%.*]], 0
 ; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[X2]], true
-; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[X2]], true
+; CHECK-NEXT:    [[TMP1:%.*]] = or i1 [[X1]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
 ; CHECK:         [[NOT_X2:%.*]] = xor i1 [[X2]], true
-; CHECK-NEXT:    [[DOT:%.*]] = zext i1 [[NOT_X2]] to i32
-; CHECK-NEXT:    store i32 [[DOT]], i32* [[P:%.*]], align 4
-; CHECK-NEXT:    br label [[TMP4]]
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = zext i1 [[NOT_X2]] to i32
+; CHECK-NEXT:    store i32 [[SPEC_SELECT]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[TMP3]]
 ; CHECK:         ret void
 ;
 entry:
@@ -44,8 +44,8 @@ define void @test_simple_commuted(i32* %p, i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[X1]], [[X2]]
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]]
-; CHECK:         [[DOT:%.*]] = zext i1 [[X2]] to i32
-; CHECK-NEXT:    store i32 [[DOT]], i32* [[P:%.*]], align 4
+; CHECK:         [[SPEC_SELECT:%.*]] = zext i1 [[X2]] to i32
+; CHECK-NEXT:    store i32 [[SPEC_SELECT]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    br label [[TMP2]]
 ; CHECK:         ret void
 ;
@@ -76,16 +76,16 @@ define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[X4]], true
-; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[X4]], true
+; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]]
 ; CHECK:         [[X3:%.*]] = icmp eq i32 [[C]], 0
-; CHECK-NEXT:    [[NOT_X2:%.*]] = icmp ne i32 [[B]], 0
-; CHECK-NEXT:    [[DOT:%.*]] = zext i1 [[NOT_X2]] to i32
-; CHECK-NEXT:    [[DOT_:%.*]] = select i1 [[X3]], i32 [[DOT]], i32 2
-; CHECK-NEXT:    [[DOT__:%.*]] = select i1 [[X4]], i32 [[DOT_]], i32 3
-; CHECK-NEXT:    store i32 [[DOT__]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[X2:%.*]] = icmp ne i32 [[B]], 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = zext i1 [[X2]] to i32
+; CHECK-NEXT:    [[SPEC_SELECT1:%.*]] = select i1 [[X3]], i32 [[SPEC_SELECT]], i32 2
+; CHECK-NEXT:    [[SPEC_SELECT2:%.*]] = select i1 [[X4]], i32 [[SPEC_SELECT1]], i32 3
+; CHECK-NEXT:    store i32 [[SPEC_SELECT2]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    br label [[TMP6]]
 ; CHECK:         ret void
 ;
@@ -265,8 +265,7 @@ define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
 ; CHECK-LABEL: @test_diamond_simple(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[Z1:%.*]] = add i32 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[Z2:%.*]] = select i1 [[X1]], i32 [[Z1]], i32 0
+; CHECK-NEXT:    [[Z2:%.*]] = select i1 [[X1]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B]], 0
 ; CHECK-NEXT:    [[Z3:%.*]] = sub i32 [[Z2]], [[B]]
 ; CHECK-NEXT:    [[Z4:%.*]] = select i1 [[X2]], i32 [[Z3]], i32 3