aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Support/KnownBits.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp17
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp39
-rw-r--r--lib/LTO/LTOModule.cpp24
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp52
-rw-r--r--lib/Target/X86/X86InstrAVX512.td16
-rw-r--r--lib/Target/X86/X86WinEHState.cpp6
-rw-r--r--test/CodeGen/PowerPC/combine_loads_from_build_pair.ll2
-rw-r--r--test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll15
-rw-r--r--test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll11
-rw-r--r--test/CodeGen/X86/bitcast-int-to-vector-bool.ll11
-rw-r--r--test/CodeGen/X86/setcc-wide-types.ll252
-rw-r--r--test/CodeGen/X86/win32-eh-available-externally.ll28
13 files changed, 387 insertions, 88 deletions
diff --git a/include/llvm/Support/KnownBits.h b/include/llvm/Support/KnownBits.h
index 7a4de3e5ff12..97e73b13fca3 100644
--- a/include/llvm/Support/KnownBits.h
+++ b/include/llvm/Support/KnownBits.h
@@ -100,13 +100,11 @@ public:
/// Make this value negative.
void makeNegative() {
- assert(!isNonNegative() && "Can't make a non-negative value negative");
One.setSignBit();
}
/// Make this value negative.
void makeNonNegative() {
- assert(!isNegative() && "Can't make a negative value non-negative");
Zero.setSignBit();
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 74970ab5792c..7643790df350 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -49,6 +49,8 @@
using namespace llvm;
+#define DEBUG_TYPE "legalizevectorops"
+
namespace {
class VectorLegalizer {
@@ -226,7 +228,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
- if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+ DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG));
switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
LD->getMemoryVT())) {
default: llvm_unreachable("This action is not supported yet!");
@@ -252,11 +255,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
return LegalizeOp(ExpandLoad(Op));
}
+ }
} else if (Op.getOpcode() == ISD::STORE) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
- if (StVT.isVector() && ST->isTruncatingStore())
+ if (StVT.isVector() && ST->isTruncatingStore()) {
+ DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
+ Node->dump(&DAG));
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
@@ -270,6 +276,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
return LegalizeOp(ExpandStore(Op));
}
+ }
} else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
HasVectorValue = true;
@@ -376,6 +383,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
break;
}
+ DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
+
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
@@ -383,12 +392,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Changed = true;
break;
case TargetLowering::Legal:
+ DEBUG(dbgs() << "Legal node: nothing to do\n");
break;
case TargetLowering::Custom: {
+ DEBUG(dbgs() << "Trying custom legalization\n");
if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
+ DEBUG(dbgs() << "Successfully custom legalized node\n");
Result = Tmp1;
break;
}
+ DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
}
case TargetLowering::Expand:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a04c770c51c4..4c8b63d2f239 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5943,7 +5943,9 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
@@ -6043,7 +6045,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
@@ -6108,7 +6112,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
@@ -6134,7 +6140,9 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
@@ -6160,7 +6168,9 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
@@ -6189,7 +6199,9 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
@@ -6224,7 +6236,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
@@ -6256,7 +6270,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
@@ -7112,6 +7128,8 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
void SelectionDAG::salvageDebugInfo(SDNode &N) {
if (!N.getHasDebugValue())
return;
+
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
for (auto DV : GetDbgValues(&N)) {
if (DV->isInvalidated())
continue;
@@ -7135,13 +7153,16 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
SDDbgValue *Clone =
getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
+ ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
- AddDbgValue(Clone, N0.getNode(), false);
DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
dbgs() << " into " << *DIExpr << '\n');
}
}
}
+
+ for (SDDbgValue *Dbg : ClonedDVs)
+ AddDbgValue(Dbg, Dbg->getSDNode(), false);
}
namespace {
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 51b4f225939f..626d2f5dc813 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -388,24 +388,20 @@ void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) {
// from the ObjC data structures generated by the front end.
// special case if this data blob is an ObjC class definition
- std::string Section = v->getSection();
- if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
- addObjCClass(gv);
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) {
+ StringRef Section = GV->getSection();
+ if (Section.startswith("__OBJC,__class,")) {
+ addObjCClass(GV);
}
- }
- // special case if this data blob is an ObjC category definition
- else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
- addObjCCategory(gv);
+ // special case if this data blob is an ObjC category definition
+ else if (Section.startswith("__OBJC,__category,")) {
+ addObjCCategory(GV);
}
- }
- // special case if this data blob is the list of referenced classes
- else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
- addObjCClassRef(gv);
+ // special case if this data blob is the list of referenced classes
+ else if (Section.startswith("__OBJC,__cls_refs,")) {
+ addObjCClassRef(GV);
}
}
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ba3b02e25a9d..9edd799779c7 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16281,7 +16281,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
// Truncate if we had to extend i16/i8 above.
if (VT != ExtVT) {
WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
- SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal);
+ SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal);
}
// Extract back to 128/256-bit if we widened.
@@ -18426,7 +18426,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
// Truncate if we had to extend i16/i8 above.
if (VT != ExtVT) {
WideVT = MVT::getVectorVT(VTElt, NumElts);
- V = DAG.getNode(X86ISD::VTRUNC, dl, WideVT, V);
+ V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V);
}
// Extract back to 128/256-bit if we widened.
@@ -18679,6 +18679,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+ if (Subtarget.hasVLX()) {
+ // Extract to v4i1/v2i1.
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load,
+ DAG.getIntPtrConstant(0, dl));
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
+ }
+
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);
@@ -18698,22 +18706,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
if (NumElts <= 8) {
// A subset, assume that we have only AVX-512F
- unsigned NumBitsToLoad = 8;
- MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
- SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
+ SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(),
Ld->getBasePtr(),
Ld->getMemOperand());
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
- MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);
- SDValue BitVec = DAG.getBitcast(MaskVT, Load);
+ SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load);
if (NumElts == 8)
return DAG.getNode(ExtOpcode, dl, VT, BitVec);
- // we should take care to v4i1 and v2i1
+ if (Subtarget.hasVLX()) {
+ // Extract to v4i1/v2i1.
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec,
+ DAG.getIntPtrConstant(0, dl));
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
+ }
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);
@@ -18728,13 +18739,12 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
Ld->getBasePtr(),
Ld->getMemOperand());
- SDValue BasePtrHi =
- DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(2, dl, BasePtr.getValueType()));
+ SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl);
- SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
- BasePtrHi,
- Ld->getMemOperand());
+ SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), BasePtrHi,
+ Ld->getPointerInfo().getWithOffset(2),
+ MinAlign(Ld->getAlignment(), 2U),
+ Ld->getMemOperand()->getFlags());
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
LoadLo.getValue(1), LoadHi.getValue(1));
@@ -34051,15 +34061,14 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);
SDValue Load2 =
- DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
- std::min(16U, Alignment), Ld->getMemOperand()->getFlags());
+ DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo().getWithOffset(16),
+ MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags());
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1),
Load2.getValue(1));
- SDValue NewVec = DAG.getUNDEF(RegVT);
- NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl);
- NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl);
+ SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);
return DCI.CombineTo(N, NewVec, TF, true);
}
@@ -34465,8 +34474,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
Alignment, St->getMemOperand()->getFlags());
SDValue Ch1 =
- DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(),
- std::min(16U, Alignment), St->getMemOperand()->getFlags());
+ DAG.getStore(St->getChain(), dl, Value1, Ptr1,
+ St->getPointerInfo().getWithOffset(16),
+ MinAlign(Alignment, 16U), St->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 46c19f18f8d3..dcd84930741b 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -8704,17 +8704,6 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
}
-// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
- X86VectorVTInfo _> {
-
- def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
- (X86Info.VT (EXTRACT_SUBREG
- (_.VT (!cast<Instruction>(NAME#"Zrr")
- (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
- X86Info.SubRegIdx))>;
-}
-
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
@@ -8724,11 +8713,6 @@ let Predicates = [prd] in
defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
}
-let Predicates = [prd, NoVLX] in {
- defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
- defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
- }
-
}
defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 0472a85f50da..6d6dedc60736 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -149,6 +149,12 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool WinEHStatePass::runOnFunction(Function &F) {
+ // Don't insert state stores or exception handler thunks for
+ // available_externally functions. The handler needs to reference the LSDA,
+ // which will not be emitted in this case.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
// Check the personality. Do nothing if this personality doesn't use funclets.
if (!F.hasPersonalityFn())
return false;
diff --git a/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll b/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll
index 0f8f18a17879..45cc740d1eae 100644
--- a/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll
+++ b/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll
@@ -12,6 +12,8 @@ define i64 @func1(i64 %p1, i64 %p2, i64 %p3, i64 %p4, { i64, i8* } %struct) {
; CHECK-DAG: [[LOBITS:t[0-9]+]]: i32,ch = load<LD4[FixedStack-2]>
; CHECK-DAG: [[HIBITS:t[0-9]+]]: i32,ch = load<LD4[FixedStack-1]>
; CHECK: Combining: t{{[0-9]+}}: i64 = build_pair [[LOBITS]], [[HIBITS]]
+; CHECK-NEXT: Creating new node
+; CHECK-SAME: load<LD8[FixedStack-1]
; CHECK-NEXT: into
; CHECK-SAME: load<LD8[FixedStack-1]
; CHECK-LABEL: Optimized lowered selection DAG:
diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index dcddb8e82642..6ef2be99dee5 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -48,9 +48,8 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
-; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i2 %a0 to <2 x i1>
%2 = sext <2 x i1> %1 to <2 x i64>
@@ -91,10 +90,8 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = sext <4 x i1> %1 to <4 x i32>
@@ -246,8 +243,8 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = sext <4 x i1> %1 to <4 x i64>
diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index f88b540323cb..9e77cd11449e 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -63,9 +63,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512VLBW-NEXT: kmovd %eax, %k1
-; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
-; AVX512VLBW-NEXT: vzeroupper
+; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
; AVX512VLBW-NEXT: retq
%1 = bitcast i2 %a0 to <2 x i1>
%2 = zext <2 x i1> %1 to <2 x i64>
@@ -120,9 +118,7 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512VLBW-NEXT: kmovd %eax, %k1
-; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
-; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512VLBW-NEXT: vzeroupper
+; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
; AVX512VLBW-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = zext <4 x i1> %1 to <4 x i32>
@@ -317,8 +313,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512VLBW-NEXT: kmovd %eax, %k1
-; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512VLBW-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
+; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
; AVX512VLBW-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = zext <4 x i1> %1 to <4 x i64>
diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
index 6d9f832d861f..45a48fae146d 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
@@ -46,9 +46,8 @@ define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) {
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
-; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i2 %a0 to <2 x i1>
ret <2 x i1> %1
@@ -90,10 +89,8 @@ define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) {
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
ret <4 x i1> %1
diff --git a/test/CodeGen/X86/setcc-wide-types.ll b/test/CodeGen/X86/setcc-wide-types.ll
index f935db72dcb9..410378ffbad2 100644
--- a/test/CodeGen/X86/setcc-wide-types.ll
+++ b/test/CodeGen/X86/setcc-wide-types.ll
@@ -138,3 +138,255 @@ define i32 @eq_i256(<4 x i64> %x, <4 x i64> %y) {
ret i32 %zext
}
+; This test models the expansion of 'memcmp(a, b, 32) != 0'
+; if we allowed 2 pairs of 16-byte loads per block.
+
+define i32 @ne_i128_pair(i128* %a, i128* %b) {
+; SSE2-LABEL: ne_i128_pair:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movq (%rdi), %rax
+; SSE2-NEXT: movq 8(%rdi), %rcx
+; SSE2-NEXT: xorq (%rsi), %rax
+; SSE2-NEXT: xorq 8(%rsi), %rcx
+; SSE2-NEXT: movq 24(%rdi), %rdx
+; SSE2-NEXT: movq 16(%rdi), %rdi
+; SSE2-NEXT: xorq 16(%rsi), %rdi
+; SSE2-NEXT: orq %rax, %rdi
+; SSE2-NEXT: xorq 24(%rsi), %rdx
+; SSE2-NEXT: orq %rcx, %rdx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: orq %rdi, %rdx
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: ne_i128_pair:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq (%rdi), %rax
+; AVX2-NEXT: movq 8(%rdi), %rcx
+; AVX2-NEXT: xorq (%rsi), %rax
+; AVX2-NEXT: xorq 8(%rsi), %rcx
+; AVX2-NEXT: movq 24(%rdi), %rdx
+; AVX2-NEXT: movq 16(%rdi), %rdi
+; AVX2-NEXT: xorq 16(%rsi), %rdi
+; AVX2-NEXT: orq %rax, %rdi
+; AVX2-NEXT: xorq 24(%rsi), %rdx
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: orq %rdi, %rdx
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: retq
+ %a0 = load i128, i128* %a
+ %b0 = load i128, i128* %b
+ %xor1 = xor i128 %a0, %b0
+ %ap1 = getelementptr i128, i128* %a, i128 1
+ %bp1 = getelementptr i128, i128* %b, i128 1
+ %a1 = load i128, i128* %ap1
+ %b1 = load i128, i128* %bp1
+ %xor2 = xor i128 %a1, %b1
+ %or = or i128 %xor1, %xor2
+ %cmp = icmp ne i128 %or, 0
+ %z = zext i1 %cmp to i32
+ ret i32 %z
+}
+
+; This test models the expansion of 'memcmp(a, b, 32) == 0'
+; if we allowed 2 pairs of 16-byte loads per block.
+
+define i32 @eq_i128_pair(i128* %a, i128* %b) {
+; SSE2-LABEL: eq_i128_pair:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movq (%rdi), %rax
+; SSE2-NEXT: movq 8(%rdi), %rcx
+; SSE2-NEXT: xorq (%rsi), %rax
+; SSE2-NEXT: xorq 8(%rsi), %rcx
+; SSE2-NEXT: movq 24(%rdi), %rdx
+; SSE2-NEXT: movq 16(%rdi), %rdi
+; SSE2-NEXT: xorq 16(%rsi), %rdi
+; SSE2-NEXT: orq %rax, %rdi
+; SSE2-NEXT: xorq 24(%rsi), %rdx
+; SSE2-NEXT: orq %rcx, %rdx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: orq %rdi, %rdx
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: eq_i128_pair:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq (%rdi), %rax
+; AVX2-NEXT: movq 8(%rdi), %rcx
+; AVX2-NEXT: xorq (%rsi), %rax
+; AVX2-NEXT: xorq 8(%rsi), %rcx
+; AVX2-NEXT: movq 24(%rdi), %rdx
+; AVX2-NEXT: movq 16(%rdi), %rdi
+; AVX2-NEXT: xorq 16(%rsi), %rdi
+; AVX2-NEXT: orq %rax, %rdi
+; AVX2-NEXT: xorq 24(%rsi), %rdx
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: orq %rdi, %rdx
+; AVX2-NEXT: sete %al
+; AVX2-NEXT: retq
+ %a0 = load i128, i128* %a
+ %b0 = load i128, i128* %b
+ %xor1 = xor i128 %a0, %b0
+ %ap1 = getelementptr i128, i128* %a, i128 1
+ %bp1 = getelementptr i128, i128* %b, i128 1
+ %a1 = load i128, i128* %ap1
+ %b1 = load i128, i128* %bp1
+ %xor2 = xor i128 %a1, %b1
+ %or = or i128 %xor1, %xor2
+ %cmp = icmp eq i128 %or, 0
+ %z = zext i1 %cmp to i32
+ ret i32 %z
+}
+
+; This test models the expansion of 'memcmp(a, b, 64) != 0'
+; if we allowed 2 pairs of 32-byte loads per block.
+
+define i32 @ne_i256_pair(i256* %a, i256* %b) {
+; SSE2-LABEL: ne_i256_pair:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movq 16(%rdi), %r9
+; SSE2-NEXT: movq 24(%rdi), %r11
+; SSE2-NEXT: movq (%rdi), %r8
+; SSE2-NEXT: movq 8(%rdi), %r10
+; SSE2-NEXT: xorq 8(%rsi), %r10
+; SSE2-NEXT: xorq 24(%rsi), %r11
+; SSE2-NEXT: xorq (%rsi), %r8
+; SSE2-NEXT: xorq 16(%rsi), %r9
+; SSE2-NEXT: movq 48(%rdi), %rdx
+; SSE2-NEXT: movq 32(%rdi), %rax
+; SSE2-NEXT: movq 56(%rdi), %rcx
+; SSE2-NEXT: movq 40(%rdi), %rdi
+; SSE2-NEXT: xorq 40(%rsi), %rdi
+; SSE2-NEXT: xorq 56(%rsi), %rcx
+; SSE2-NEXT: orq %r11, %rcx
+; SSE2-NEXT: orq %rdi, %rcx
+; SSE2-NEXT: orq %r10, %rcx
+; SSE2-NEXT: xorq 32(%rsi), %rax
+; SSE2-NEXT: xorq 48(%rsi), %rdx
+; SSE2-NEXT: orq %r9, %rdx
+; SSE2-NEXT: orq %rax, %rdx
+; SSE2-NEXT: orq %r8, %rdx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: orq %rcx, %rdx
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: ne_i256_pair:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq 16(%rdi), %r9
+; AVX2-NEXT: movq 24(%rdi), %r11
+; AVX2-NEXT: movq (%rdi), %r8
+; AVX2-NEXT: movq 8(%rdi), %r10
+; AVX2-NEXT: xorq 8(%rsi), %r10
+; AVX2-NEXT: xorq 24(%rsi), %r11
+; AVX2-NEXT: xorq (%rsi), %r8
+; AVX2-NEXT: xorq 16(%rsi), %r9
+; AVX2-NEXT: movq 48(%rdi), %rdx
+; AVX2-NEXT: movq 32(%rdi), %rax
+; AVX2-NEXT: movq 56(%rdi), %rcx
+; AVX2-NEXT: movq 40(%rdi), %rdi
+; AVX2-NEXT: xorq 40(%rsi), %rdi
+; AVX2-NEXT: xorq 56(%rsi), %rcx
+; AVX2-NEXT: orq %r11, %rcx
+; AVX2-NEXT: orq %rdi, %rcx
+; AVX2-NEXT: orq %r10, %rcx
+; AVX2-NEXT: xorq 32(%rsi), %rax
+; AVX2-NEXT: xorq 48(%rsi), %rdx
+; AVX2-NEXT: orq %r9, %rdx
+; AVX2-NEXT: orq %rax, %rdx
+; AVX2-NEXT: orq %r8, %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: retq
+ %a0 = load i256, i256* %a
+ %b0 = load i256, i256* %b
+ %xor1 = xor i256 %a0, %b0
+ %ap1 = getelementptr i256, i256* %a, i256 1
+ %bp1 = getelementptr i256, i256* %b, i256 1
+ %a1 = load i256, i256* %ap1
+ %b1 = load i256, i256* %bp1
+ %xor2 = xor i256 %a1, %b1
+ %or = or i256 %xor1, %xor2
+ %cmp = icmp ne i256 %or, 0
+ %z = zext i1 %cmp to i32
+ ret i32 %z
+}
+
+; This test models the expansion of 'memcmp(a, b, 64) == 0'
+; if we allowed 2 pairs of 32-byte loads per block.
+
+define i32 @eq_i256_pair(i256* %a, i256* %b) {
+; SSE2-LABEL: eq_i256_pair:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movq 16(%rdi), %r9
+; SSE2-NEXT: movq 24(%rdi), %r11
+; SSE2-NEXT: movq (%rdi), %r8
+; SSE2-NEXT: movq 8(%rdi), %r10
+; SSE2-NEXT: xorq 8(%rsi), %r10
+; SSE2-NEXT: xorq 24(%rsi), %r11
+; SSE2-NEXT: xorq (%rsi), %r8
+; SSE2-NEXT: xorq 16(%rsi), %r9
+; SSE2-NEXT: movq 48(%rdi), %rdx
+; SSE2-NEXT: movq 32(%rdi), %rax
+; SSE2-NEXT: movq 56(%rdi), %rcx
+; SSE2-NEXT: movq 40(%rdi), %rdi
+; SSE2-NEXT: xorq 40(%rsi), %rdi
+; SSE2-NEXT: xorq 56(%rsi), %rcx
+; SSE2-NEXT: orq %r11, %rcx
+; SSE2-NEXT: orq %rdi, %rcx
+; SSE2-NEXT: orq %r10, %rcx
+; SSE2-NEXT: xorq 32(%rsi), %rax
+; SSE2-NEXT: xorq 48(%rsi), %rdx
+; SSE2-NEXT: orq %r9, %rdx
+; SSE2-NEXT: orq %rax, %rdx
+; SSE2-NEXT: orq %r8, %rdx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: orq %rcx, %rdx
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: eq_i256_pair:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movq 16(%rdi), %r9
+; AVX2-NEXT: movq 24(%rdi), %r11
+; AVX2-NEXT: movq (%rdi), %r8
+; AVX2-NEXT: movq 8(%rdi), %r10
+; AVX2-NEXT: xorq 8(%rsi), %r10
+; AVX2-NEXT: xorq 24(%rsi), %r11
+; AVX2-NEXT: xorq (%rsi), %r8
+; AVX2-NEXT: xorq 16(%rsi), %r9
+; AVX2-NEXT: movq 48(%rdi), %rdx
+; AVX2-NEXT: movq 32(%rdi), %rax
+; AVX2-NEXT: movq 56(%rdi), %rcx
+; AVX2-NEXT: movq 40(%rdi), %rdi
+; AVX2-NEXT: xorq 40(%rsi), %rdi
+; AVX2-NEXT: xorq 56(%rsi), %rcx
+; AVX2-NEXT: orq %r11, %rcx
+; AVX2-NEXT: orq %rdi, %rcx
+; AVX2-NEXT: orq %r10, %rcx
+; AVX2-NEXT: xorq 32(%rsi), %rax
+; AVX2-NEXT: xorq 48(%rsi), %rdx
+; AVX2-NEXT: orq %r9, %rdx
+; AVX2-NEXT: orq %rax, %rdx
+; AVX2-NEXT: orq %r8, %rdx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: sete %al
+; AVX2-NEXT: retq
+ %a0 = load i256, i256* %a
+ %b0 = load i256, i256* %b
+ %xor1 = xor i256 %a0, %b0
+ %ap1 = getelementptr i256, i256* %a, i256 1
+ %bp1 = getelementptr i256, i256* %b, i256 1
+ %a1 = load i256, i256* %ap1
+ %b1 = load i256, i256* %bp1
+ %xor2 = xor i256 %a1, %b1
+ %or = or i256 %xor1, %xor2
+ %cmp = icmp eq i256 %or, 0
+ %z = zext i1 %cmp to i32
+ ret i32 %z
+}
+
diff --git a/test/CodeGen/X86/win32-eh-available-externally.ll b/test/CodeGen/X86/win32-eh-available-externally.ll
new file mode 100644
index 000000000000..49da191de978
--- /dev/null
+++ b/test/CodeGen/X86/win32-eh-available-externally.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -x86-winehstate < %s | FileCheck %s --check-prefix=IR
+; RUN: llc < %s | FileCheck %s --check-prefix=ASM
+
+; IR-NOT: define.*__ehhandler
+; IR: define available_externally void @foo(void ()*)
+; IR-NOT: define.*__ehhandler
+
+; No code should be emitted.
+; ASM-NOT: __ehtable
+; ASM-NOT: __ehhandler
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+declare i32 @__CxxFrameHandler3(...) unnamed_addr
+
+define available_externally void @foo(void ()*) personality i32 (...)* @__CxxFrameHandler3 {
+start:
+ invoke void %0()
+ to label %good unwind label %bad
+
+good: ; preds = %start
+ ret void
+
+bad: ; preds = %start
+ %cleanuppad = cleanuppad within none []
+ cleanupret from %cleanuppad unwind to caller
+}