src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-12-28 23:57:18 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-28 23:57:18 +0000
commit	b8a2042aa938069e862750553db0e4d82d25822c (patch)
tree	8acb3313c9e21902b08a931a708ba405aa157b31
parent	b2b7c066a48f61ec67332fb797a20bb04901c83d (diff)
download	src-b8a2042aa938069e862750553db0e4d82d25822c.tar.gz src-b8a2042aa938069e862750553db0e4d82d25822c.zip

Vendor import of llvm trunk r321545:vendor/llvm/llvm-trunk-r321545

https://llvm.org/svn/llvm-project/llvm/trunk@321545

Notes

Notes: svn path=/vendor/llvm/dist/; revision=327320 svn path=/vendor/llvm/llvm-trunk-r321545/; revision=327321; tag=vendor/llvm/llvm-trunk-r321545

Diffstat

-rw-r--r--

include/llvm/Support/KnownBits.h

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

-rw-r--r--

lib/LTO/LTOModule.cpp

-rw-r--r--

lib/Target/X86/X86ISelLowering.cpp

-rw-r--r--

lib/Target/X86/X86InstrAVX512.td

-rw-r--r--

lib/Target/X86/X86WinEHState.cpp

-rw-r--r--

test/CodeGen/PowerPC/combine_loads_from_build_pair.ll

-rw-r--r--

test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

-rw-r--r--

test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

-rw-r--r--

test/CodeGen/X86/bitcast-int-to-vector-bool.ll

-rw-r--r--

test/CodeGen/X86/setcc-wide-types.ll

252

-rw-r--r--

test/CodeGen/X86/win32-eh-available-externally.ll

13 files changed, 387 insertions, 88 deletions

diff --git a/include/llvm/Support/KnownBits.h b/include/llvm/Support/KnownBits.h
index 7a4de3e5ff12..97e73b13fca3 100644
--- a/include/llvm/Support/KnownBits.h
+++ b/include/llvm/Support/KnownBits.h

@@ -100,13 +100,11 @@ public:

/// Make this value negative.

void makeNegative() {

- assert(!isNonNegative() && "Can't make a non-negative value negative");

One.setSignBit();

}

/// Make this value negative.

void makeNonNegative() {

- assert(!isNegative() && "Can't make a negative value non-negative");

Zero.setSignBit();

}

diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 74970ab5792c..7643790df350 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

@@ -49,6 +49,8 @@

using namespace llvm;

+#define DEBUG_TYPE "legalizevectorops"

namespace {

class VectorLegalizer {

@@ -226,7 +228,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

if (Op.getOpcode() == ISD::LOAD) {

LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());

ISD::LoadExtType ExtType = LD->getExtensionType();

- if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)

+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {

+ DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG));

switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),

LD->getMemoryVT())) {

default: llvm_unreachable("This action is not supported yet!");

@@ -252,11 +255,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

Changed = true;

return LegalizeOp(ExpandLoad(Op));

}

+ }

} else if (Op.getOpcode() == ISD::STORE) {

StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());

EVT StVT = ST->getMemoryVT();

MVT ValVT = ST->getValue().getSimpleValueType();

- if (StVT.isVector() && ST->isTruncatingStore())

+ if (StVT.isVector() && ST->isTruncatingStore()) {

+ DEBUG(dbgs() << "\nLegalizing truncating vector store: ";

+ Node->dump(&DAG));

switch (TLI.getTruncStoreAction(ValVT, StVT)) {

default: llvm_unreachable("This action is not supported yet!");

case TargetLowering::Legal:

@@ -270,6 +276,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

Changed = true;

return LegalizeOp(ExpandStore(Op));

}

+ }

} else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)

HasVectorValue = true;

@@ -376,6 +383,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

break;

}

+ DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));

switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {

default: llvm_unreachable("This action is not supported yet!");

case TargetLowering::Promote:

@@ -383,12 +392,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

Changed = true;

break;

case TargetLowering::Legal:

+ DEBUG(dbgs() << "Legal node: nothing to do\n");

break;

case TargetLowering::Custom: {

+ DEBUG(dbgs() << "Trying custom legalization\n");

if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {

+ DEBUG(dbgs() << "Successfully custom legalized node\n");

Result = Tmp1;

break;

}

+ DEBUG(dbgs() << "Could not custom legalize node\n");

LLVM_FALLTHROUGH;

}

case TargetLowering::Expand:

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a04c770c51c4..4c8b63d2f239 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

@@ -5943,7 +5943,9 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,

CSEMap.InsertNode(N, IP);

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,

@@ -6043,7 +6045,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,

CSEMap.InsertNode(N, IP);

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,

@@ -6108,7 +6112,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,

CSEMap.InsertNode(N, IP);

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,

@@ -6134,7 +6140,9 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,

CSEMap.InsertNode(N, IP);

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,

@@ -6160,7 +6168,9 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,

CSEMap.InsertNode(N, IP);

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,

@@ -6189,7 +6199,9 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,

CSEMap.InsertNode(N, IP);

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,

@@ -6224,7 +6236,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,

CSEMap.InsertNode(N, IP);

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,

@@ -6256,7 +6270,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,

CSEMap.InsertNode(N, IP);

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,

@@ -7112,6 +7128,8 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,

void SelectionDAG::salvageDebugInfo(SDNode &N) {

if (!N.getHasDebugValue())

return;

+ SmallVector<SDDbgValue *, 2> ClonedDVs;

for (auto DV : GetDbgValues(&N)) {

if (DV->isInvalidated())

continue;

@@ -7135,13 +7153,16 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {

SDDbgValue *Clone =

getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),

DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());

+ ClonedDVs.push_back(Clone);

DV->setIsInvalidated();

- AddDbgValue(Clone, N0.getNode(), false);

DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);

dbgs() << " into " << *DIExpr << '\n');

}

+ for (SDDbgValue *Dbg : ClonedDVs)

+ AddDbgValue(Dbg, Dbg->getSDNode(), false);

}

namespace {

diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 51b4f225939f..626d2f5dc813 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp

@@ -388,24 +388,20 @@ void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) {

// from the ObjC data structures generated by the front end.

// special case if this data blob is an ObjC class definition

- std::string Section = v->getSection();

- if (Section.compare(0, 15, "__OBJC,__class,") == 0) {

- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {

- addObjCClass(gv);

+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) {

+ StringRef Section = GV->getSection();

+ if (Section.startswith("__OBJC,__class,")) {

+ addObjCClass(GV);

}

- }

- // special case if this data blob is an ObjC category definition

- else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {

- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {

- addObjCCategory(gv);

+ // special case if this data blob is an ObjC category definition

+ else if (Section.startswith("__OBJC,__category,")) {

+ addObjCCategory(GV);

}

- }

- // special case if this data blob is the list of referenced classes

- else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {

- if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {

- addObjCClassRef(gv);

+ // special case if this data blob is the list of referenced classes

+ else if (Section.startswith("__OBJC,__cls_refs,")) {

+ addObjCClassRef(GV);

}

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ba3b02e25a9d..9edd799779c7 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -16281,7 +16281,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,

// Truncate if we had to extend i16/i8 above.

if (VT != ExtVT) {

WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);

- SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal);

+ SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal);

}

// Extract back to 128/256-bit if we widened.

@@ -18426,7 +18426,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,

// Truncate if we had to extend i16/i8 above.

if (VT != ExtVT) {

WideVT = MVT::getVectorVT(VTElt, NumElts);

- V = DAG.getNode(X86ISD::VTRUNC, dl, WideVT, V);

+ V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V);

}

// Extract back to 128/256-bit if we widened.

@@ -18679,6 +18679,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,

// Replace chain users with the new chain.

assert(Load->getNumValues() == 2 && "Loads must carry a chain!");

DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));

+ if (Subtarget.hasVLX()) {

+ // Extract to v4i1/v2i1.

+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load,

+ DAG.getIntPtrConstant(0, dl));

+ // Finally, do a normal sign-extend to the desired register.

+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);

+ }

MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);

SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);

@@ -18698,22 +18706,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,

if (NumElts <= 8) {

// A subset, assume that we have only AVX-512F

- unsigned NumBitsToLoad = 8;

- MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);

- SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),

+ SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(),

Ld->getBasePtr(),

Ld->getMemOperand());

// Replace chain users with the new chain.

assert(Load->getNumValues() == 2 && "Loads must carry a chain!");

DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));

- MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);

- SDValue BitVec = DAG.getBitcast(MaskVT, Load);

+ SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load);

if (NumElts == 8)

return DAG.getNode(ExtOpcode, dl, VT, BitVec);

- // we should take care to v4i1 and v2i1

+ if (Subtarget.hasVLX()) {

+ // Extract to v4i1/v2i1.

+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec,

+ DAG.getIntPtrConstant(0, dl));

+ // Finally, do a normal sign-extend to the desired register.

+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);

+ }

MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);

SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);

@@ -18728,13 +18739,12 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,

Ld->getBasePtr(),

Ld->getMemOperand());

- SDValue BasePtrHi =

- DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,

- DAG.getConstant(2, dl, BasePtr.getValueType()));

+ SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl);

- SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),

- BasePtrHi,

- Ld->getMemOperand());

+ SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), BasePtrHi,

+ Ld->getPointerInfo().getWithOffset(2),

+ MinAlign(Ld->getAlignment(), 2U),

+ Ld->getMemOperand()->getFlags());

SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

LoadLo.getValue(1), LoadHi.getValue(1));

@@ -34051,15 +34061,14 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,

Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);

SDValue Load2 =

- DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),

- std::min(16U, Alignment), Ld->getMemOperand()->getFlags());

+ DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,

+ Ld->getPointerInfo().getWithOffset(16),

+ MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags());

SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

Load1.getValue(1),

Load2.getValue(1));

- SDValue NewVec = DAG.getUNDEF(RegVT);

- NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl);

- NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl);

+ SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);

return DCI.CombineTo(N, NewVec, TF, true);

}

@@ -34465,8 +34474,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,

DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),

Alignment, St->getMemOperand()->getFlags());

SDValue Ch1 =

- DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(),

- std::min(16U, Alignment), St->getMemOperand()->getFlags());

+ DAG.getStore(St->getChain(), dl, Value1, Ptr1,

+ St->getPointerInfo().getWithOffset(16),

+ MinAlign(Alignment, 16U), St->getMemOperand()->getFlags());

return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);

}

diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 46c19f18f8d3..dcd84930741b 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td

@@ -8704,17 +8704,6 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),

IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;

}

-// Use 512bit version to implement 128/256 bit in case NoVLX.

-multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,

- X86VectorVTInfo _> {

- def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),

- (X86Info.VT (EXTRACT_SUBREG

- (_.VT (!cast<Instruction>(NAME#"Zrr")

- (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),

- X86Info.SubRegIdx))>;

multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,

string OpcodeStr, Predicate prd> {

let Predicates = [prd] in

@@ -8724,11 +8713,6 @@ let Predicates = [prd] in

defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;

defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;

}

-let Predicates = [prd, NoVLX] in {

- defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;

- defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;

- }

}

defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;

diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 0472a85f50da..6d6dedc60736 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp

@@ -149,6 +149,12 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {

}

bool WinEHStatePass::runOnFunction(Function &F) {

+ // Don't insert state stores or exception handler thunks for

+ // available_externally functions. The handler needs to reference the LSDA,

+ // which will not be emitted in this case.

+ if (F.hasAvailableExternallyLinkage())

+ return false;

// Check the personality. Do nothing if this personality doesn't use funclets.

if (!F.hasPersonalityFn())

return false;

diff --git a/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll b/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll
index 0f8f18a17879..45cc740d1eae 100644
--- a/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll
+++ b/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll

@@ -12,6 +12,8 @@ define i64 @func1(i64 %p1, i64 %p2, i64 %p3, i64 %p4, { i64, i8* } %struct) {

; CHECK-DAG: [[LOBITS:t[0-9]+]]: i32,ch = load<LD4[FixedStack-2]>

; CHECK-DAG: [[HIBITS:t[0-9]+]]: i32,ch = load<LD4[FixedStack-1]>

; CHECK: Combining: t{{[0-9]+}}: i64 = build_pair [[LOBITS]], [[HIBITS]]

+; CHECK-NEXT: Creating new node

+; CHECK-SAME: load<LD8[FixedStack-1]

; CHECK-NEXT: into

; CHECK-SAME: load<LD8[FixedStack-1]

; CHECK-LABEL: Optimized lowered selection DAG:

diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index dcddb8e82642..6ef2be99dee5 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

@@ -48,9 +48,8 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {

; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)

; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax

; AVX512-NEXT: kmovd %eax, %k1

-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}

-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0

-; AVX512-NEXT: vzeroupper

+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0

+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}

; AVX512-NEXT: retq

%1 = bitcast i2 %a0 to <2 x i1>

%2 = sext <2 x i1> %1 to <2 x i64>

@@ -91,10 +90,8 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {

; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)

; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax

; AVX512-NEXT: kmovd %eax, %k1

-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0

-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}

-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0

-; AVX512-NEXT: vzeroupper

+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0

+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}

; AVX512-NEXT: retq

%1 = bitcast i4 %a0 to <4 x i1>

%2 = sext <4 x i1> %1 to <4 x i32>

@@ -246,8 +243,8 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {

; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)

; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax

; AVX512-NEXT: kmovd %eax, %k1

-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}

-; AVX512-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0

+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0

+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}

; AVX512-NEXT: retq

%1 = bitcast i4 %a0 to <4 x i1>

%2 = sext <4 x i1> %1 to <4 x i64>

diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index f88b540323cb..9e77cd11449e 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

@@ -63,9 +63,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {

; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)

; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax

; AVX512VLBW-NEXT: kmovd %eax, %k1

-; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}

-; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0

-; AVX512VLBW-NEXT: vzeroupper

+; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}

; AVX512VLBW-NEXT: retq

%1 = bitcast i2 %a0 to <2 x i1>

%2 = zext <2 x i1> %1 to <2 x i64>

@@ -120,9 +118,7 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {

; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)

; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax

; AVX512VLBW-NEXT: kmovd %eax, %k1

-; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}

-; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0

-; AVX512VLBW-NEXT: vzeroupper

+; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}

; AVX512VLBW-NEXT: retq

%1 = bitcast i4 %a0 to <4 x i1>

%2 = zext <4 x i1> %1 to <4 x i32>

@@ -317,8 +313,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {

; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)

; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax

; AVX512VLBW-NEXT: kmovd %eax, %k1

-; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}

-; AVX512VLBW-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0

+; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}

; AVX512VLBW-NEXT: retq

%1 = bitcast i4 %a0 to <4 x i1>

%2 = zext <4 x i1> %1 to <4 x i64>

diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
index 6d9f832d861f..45a48fae146d 100644
--- a/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
+++ b/test/CodeGen/X86/bitcast-int-to-vector-bool.ll

@@ -46,9 +46,8 @@ define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) {

; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)

; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax

; AVX512-NEXT: kmovd %eax, %k1

-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}

-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0

-; AVX512-NEXT: vzeroupper

+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0

+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}

; AVX512-NEXT: retq

%1 = bitcast i2 %a0 to <2 x i1>

ret <2 x i1> %1

@@ -90,10 +89,8 @@ define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) {

; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)

; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax

; AVX512-NEXT: kmovd %eax, %k1

-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0

-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}

-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0

-; AVX512-NEXT: vzeroupper

+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0

+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}

; AVX512-NEXT: retq

%1 = bitcast i4 %a0 to <4 x i1>

ret <4 x i1> %1

diff --git a/test/CodeGen/X86/setcc-wide-types.ll b/test/CodeGen/X86/setcc-wide-types.ll
index f935db72dcb9..410378ffbad2 100644
--- a/test/CodeGen/X86/setcc-wide-types.ll
+++ b/test/CodeGen/X86/setcc-wide-types.ll

@@ -138,3 +138,255 @@ define i32 @eq_i256(<4 x i64> %x, <4 x i64> %y) {

ret i32 %zext

}

+; This test models the expansion of 'memcmp(a, b, 32) != 0'

+; if we allowed 2 pairs of 16-byte loads per block.

+define i32 @ne_i128_pair(i128* %a, i128* %b) {

+; SSE2-LABEL: ne_i128_pair:

+; SSE2: # %bb.0:

+; SSE2-NEXT: movq (%rdi), %rax

+; SSE2-NEXT: movq 8(%rdi), %rcx

+; SSE2-NEXT: xorq (%rsi), %rax

+; SSE2-NEXT: xorq 8(%rsi), %rcx

+; SSE2-NEXT: movq 24(%rdi), %rdx

+; SSE2-NEXT: movq 16(%rdi), %rdi

+; SSE2-NEXT: xorq 16(%rsi), %rdi

+; SSE2-NEXT: orq %rax, %rdi

+; SSE2-NEXT: xorq 24(%rsi), %rdx

+; SSE2-NEXT: orq %rcx, %rdx

+; SSE2-NEXT: xorl %eax, %eax

+; SSE2-NEXT: orq %rdi, %rdx

+; SSE2-NEXT: setne %al

+; SSE2-NEXT: retq

+; AVX2-LABEL: ne_i128_pair:

+; AVX2: # %bb.0:

+; AVX2-NEXT: movq (%rdi), %rax

+; AVX2-NEXT: movq 8(%rdi), %rcx

+; AVX2-NEXT: xorq (%rsi), %rax

+; AVX2-NEXT: xorq 8(%rsi), %rcx

+; AVX2-NEXT: movq 24(%rdi), %rdx

+; AVX2-NEXT: movq 16(%rdi), %rdi

+; AVX2-NEXT: xorq 16(%rsi), %rdi

+; AVX2-NEXT: orq %rax, %rdi

+; AVX2-NEXT: xorq 24(%rsi), %rdx

+; AVX2-NEXT: orq %rcx, %rdx

+; AVX2-NEXT: xorl %eax, %eax

+; AVX2-NEXT: orq %rdi, %rdx

+; AVX2-NEXT: setne %al

+; AVX2-NEXT: retq

+ %a0 = load i128, i128* %a

+ %b0 = load i128, i128* %b

+ %xor1 = xor i128 %a0, %b0

+ %ap1 = getelementptr i128, i128* %a, i128 1

+ %bp1 = getelementptr i128, i128* %b, i128 1

+ %a1 = load i128, i128* %ap1

+ %b1 = load i128, i128* %bp1

+ %xor2 = xor i128 %a1, %b1

+ %or = or i128 %xor1, %xor2

+ %cmp = icmp ne i128 %or, 0

+ %z = zext i1 %cmp to i32

+ ret i32 %z

+; This test models the expansion of 'memcmp(a, b, 32) == 0'

+; if we allowed 2 pairs of 16-byte loads per block.

+define i32 @eq_i128_pair(i128* %a, i128* %b) {

+; SSE2-LABEL: eq_i128_pair:

+; SSE2: # %bb.0:

+; SSE2-NEXT: movq (%rdi), %rax

+; SSE2-NEXT: movq 8(%rdi), %rcx

+; SSE2-NEXT: xorq (%rsi), %rax

+; SSE2-NEXT: xorq 8(%rsi), %rcx

+; SSE2-NEXT: movq 24(%rdi), %rdx

+; SSE2-NEXT: movq 16(%rdi), %rdi

+; SSE2-NEXT: xorq 16(%rsi), %rdi

+; SSE2-NEXT: orq %rax, %rdi

+; SSE2-NEXT: xorq 24(%rsi), %rdx

+; SSE2-NEXT: orq %rcx, %rdx

+; SSE2-NEXT: xorl %eax, %eax

+; SSE2-NEXT: orq %rdi, %rdx

+; SSE2-NEXT: sete %al

+; SSE2-NEXT: retq

+; AVX2-LABEL: eq_i128_pair:

+; AVX2: # %bb.0:

+; AVX2-NEXT: movq (%rdi), %rax

+; AVX2-NEXT: movq 8(%rdi), %rcx

+; AVX2-NEXT: xorq (%rsi), %rax

+; AVX2-NEXT: xorq 8(%rsi), %rcx

+; AVX2-NEXT: movq 24(%rdi), %rdx

+; AVX2-NEXT: movq 16(%rdi), %rdi

+; AVX2-NEXT: xorq 16(%rsi), %rdi

+; AVX2-NEXT: orq %rax, %rdi

+; AVX2-NEXT: xorq 24(%rsi), %rdx

+; AVX2-NEXT: orq %rcx, %rdx

+; AVX2-NEXT: xorl %eax, %eax

+; AVX2-NEXT: orq %rdi, %rdx

+; AVX2-NEXT: sete %al

+; AVX2-NEXT: retq

+ %a0 = load i128, i128* %a

+ %b0 = load i128, i128* %b

+ %xor1 = xor i128 %a0, %b0

+ %ap1 = getelementptr i128, i128* %a, i128 1

+ %bp1 = getelementptr i128, i128* %b, i128 1

+ %a1 = load i128, i128* %ap1

+ %b1 = load i128, i128* %bp1

+ %xor2 = xor i128 %a1, %b1

+ %or = or i128 %xor1, %xor2

+ %cmp = icmp eq i128 %or, 0

+ %z = zext i1 %cmp to i32

+ ret i32 %z

+; This test models the expansion of 'memcmp(a, b, 64) != 0'

+; if we allowed 2 pairs of 32-byte loads per block.

+define i32 @ne_i256_pair(i256* %a, i256* %b) {

+; SSE2-LABEL: ne_i256_pair:

+; SSE2: # %bb.0:

+; SSE2-NEXT: movq 16(%rdi), %r9

+; SSE2-NEXT: movq 24(%rdi), %r11

+; SSE2-NEXT: movq (%rdi), %r8

+; SSE2-NEXT: movq 8(%rdi), %r10

+; SSE2-NEXT: xorq 8(%rsi), %r10

+; SSE2-NEXT: xorq 24(%rsi), %r11

+; SSE2-NEXT: xorq (%rsi), %r8

+; SSE2-NEXT: xorq 16(%rsi), %r9

+; SSE2-NEXT: movq 48(%rdi), %rdx

+; SSE2-NEXT: movq 32(%rdi), %rax

+; SSE2-NEXT: movq 56(%rdi), %rcx

+; SSE2-NEXT: movq 40(%rdi), %rdi

+; SSE2-NEXT: xorq 40(%rsi), %rdi

+; SSE2-NEXT: xorq 56(%rsi), %rcx

+; SSE2-NEXT: orq %r11, %rcx

+; SSE2-NEXT: orq %rdi, %rcx

+; SSE2-NEXT: orq %r10, %rcx

+; SSE2-NEXT: xorq 32(%rsi), %rax

+; SSE2-NEXT: xorq 48(%rsi), %rdx

+; SSE2-NEXT: orq %r9, %rdx

+; SSE2-NEXT: orq %rax, %rdx

+; SSE2-NEXT: orq %r8, %rdx

+; SSE2-NEXT: xorl %eax, %eax

+; SSE2-NEXT: orq %rcx, %rdx

+; SSE2-NEXT: setne %al

+; SSE2-NEXT: retq

+; AVX2-LABEL: ne_i256_pair:

+; AVX2: # %bb.0:

+; AVX2-NEXT: movq 16(%rdi), %r9

+; AVX2-NEXT: movq 24(%rdi), %r11

+; AVX2-NEXT: movq (%rdi), %r8

+; AVX2-NEXT: movq 8(%rdi), %r10

+; AVX2-NEXT: xorq 8(%rsi), %r10

+; AVX2-NEXT: xorq 24(%rsi), %r11

+; AVX2-NEXT: xorq (%rsi), %r8

+; AVX2-NEXT: xorq 16(%rsi), %r9

+; AVX2-NEXT: movq 48(%rdi), %rdx

+; AVX2-NEXT: movq 32(%rdi), %rax

+; AVX2-NEXT: movq 56(%rdi), %rcx

+; AVX2-NEXT: movq 40(%rdi), %rdi

+; AVX2-NEXT: xorq 40(%rsi), %rdi

+; AVX2-NEXT: xorq 56(%rsi), %rcx

+; AVX2-NEXT: orq %r11, %rcx

+; AVX2-NEXT: orq %rdi, %rcx

+; AVX2-NEXT: orq %r10, %rcx

+; AVX2-NEXT: xorq 32(%rsi), %rax

+; AVX2-NEXT: xorq 48(%rsi), %rdx

+; AVX2-NEXT: orq %r9, %rdx

+; AVX2-NEXT: orq %rax, %rdx

+; AVX2-NEXT: orq %r8, %rdx

+; AVX2-NEXT: xorl %eax, %eax

+; AVX2-NEXT: orq %rcx, %rdx

+; AVX2-NEXT: setne %al

+; AVX2-NEXT: retq

+ %a0 = load i256, i256* %a

+ %b0 = load i256, i256* %b

+ %xor1 = xor i256 %a0, %b0

+ %ap1 = getelementptr i256, i256* %a, i256 1

+ %bp1 = getelementptr i256, i256* %b, i256 1

+ %a1 = load i256, i256* %ap1

+ %b1 = load i256, i256* %bp1

+ %xor2 = xor i256 %a1, %b1

+ %or = or i256 %xor1, %xor2

+ %cmp = icmp ne i256 %or, 0

+ %z = zext i1 %cmp to i32

+ ret i32 %z

+; This test models the expansion of 'memcmp(a, b, 64) == 0'

+; if we allowed 2 pairs of 32-byte loads per block.

+define i32 @eq_i256_pair(i256* %a, i256* %b) {

+; SSE2-LABEL: eq_i256_pair:

+; SSE2: # %bb.0:

+; SSE2-NEXT: movq 16(%rdi), %r9

+; SSE2-NEXT: movq 24(%rdi), %r11

+; SSE2-NEXT: movq (%rdi), %r8

+; SSE2-NEXT: movq 8(%rdi), %r10

+; SSE2-NEXT: xorq 8(%rsi), %r10

+; SSE2-NEXT: xorq 24(%rsi), %r11

+; SSE2-NEXT: xorq (%rsi), %r8

+; SSE2-NEXT: xorq 16(%rsi), %r9

+; SSE2-NEXT: movq 48(%rdi), %rdx

+; SSE2-NEXT: movq 32(%rdi), %rax

+; SSE2-NEXT: movq 56(%rdi), %rcx

+; SSE2-NEXT: movq 40(%rdi), %rdi

+; SSE2-NEXT: xorq 40(%rsi), %rdi

+; SSE2-NEXT: xorq 56(%rsi), %rcx

+; SSE2-NEXT: orq %r11, %rcx

+; SSE2-NEXT: orq %rdi, %rcx

+; SSE2-NEXT: orq %r10, %rcx

+; SSE2-NEXT: xorq 32(%rsi), %rax

+; SSE2-NEXT: xorq 48(%rsi), %rdx

+; SSE2-NEXT: orq %r9, %rdx

+; SSE2-NEXT: orq %rax, %rdx

+; SSE2-NEXT: orq %r8, %rdx

+; SSE2-NEXT: xorl %eax, %eax

+; SSE2-NEXT: orq %rcx, %rdx

+; SSE2-NEXT: sete %al

+; SSE2-NEXT: retq

+; AVX2-LABEL: eq_i256_pair:

+; AVX2: # %bb.0:

+; AVX2-NEXT: movq 16(%rdi), %r9

+; AVX2-NEXT: movq 24(%rdi), %r11

+; AVX2-NEXT: movq (%rdi), %r8

+; AVX2-NEXT: movq 8(%rdi), %r10

+; AVX2-NEXT: xorq 8(%rsi), %r10

+; AVX2-NEXT: xorq 24(%rsi), %r11

+; AVX2-NEXT: xorq (%rsi), %r8

+; AVX2-NEXT: xorq 16(%rsi), %r9

+; AVX2-NEXT: movq 48(%rdi), %rdx

+; AVX2-NEXT: movq 32(%rdi), %rax

+; AVX2-NEXT: movq 56(%rdi), %rcx

+; AVX2-NEXT: movq 40(%rdi), %rdi

+; AVX2-NEXT: xorq 40(%rsi), %rdi

+; AVX2-NEXT: xorq 56(%rsi), %rcx

+; AVX2-NEXT: orq %r11, %rcx

+; AVX2-NEXT: orq %rdi, %rcx

+; AVX2-NEXT: orq %r10, %rcx

+; AVX2-NEXT: xorq 32(%rsi), %rax

+; AVX2-NEXT: xorq 48(%rsi), %rdx

+; AVX2-NEXT: orq %r9, %rdx

+; AVX2-NEXT: orq %rax, %rdx

+; AVX2-NEXT: orq %r8, %rdx

+; AVX2-NEXT: xorl %eax, %eax

+; AVX2-NEXT: orq %rcx, %rdx

+; AVX2-NEXT: sete %al

+; AVX2-NEXT: retq

+ %a0 = load i256, i256* %a

+ %b0 = load i256, i256* %b

+ %xor1 = xor i256 %a0, %b0

+ %ap1 = getelementptr i256, i256* %a, i256 1

+ %bp1 = getelementptr i256, i256* %b, i256 1

+ %a1 = load i256, i256* %ap1

+ %b1 = load i256, i256* %bp1

+ %xor2 = xor i256 %a1, %b1

+ %or = or i256 %xor1, %xor2

+ %cmp = icmp eq i256 %or, 0

+ %z = zext i1 %cmp to i32

+ ret i32 %z

diff --git a/test/CodeGen/X86/win32-eh-available-externally.ll b/test/CodeGen/X86/win32-eh-available-externally.ll
new file mode 100644
index 000000000000..49da191de978
--- /dev/null
+++ b/test/CodeGen/X86/win32-eh-available-externally.ll

@@ -0,0 +1,28 @@

+; RUN: opt -S -x86-winehstate < %s | FileCheck %s --check-prefix=IR

+; RUN: llc < %s | FileCheck %s --check-prefix=ASM

+; IR-NOT: define.*__ehhandler

+; IR: define available_externally void @foo(void ()*)

+; IR-NOT: define.*__ehhandler

+; No code should be emitted.

+; ASM-NOT: __ehtable

+; ASM-NOT: __ehhandler

+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"

+target triple = "i686-pc-windows-msvc"

+declare i32 @__CxxFrameHandler3(...) unnamed_addr

+define available_externally void @foo(void ()*) personality i32 (...)* @__CxxFrameHandler3 {

+start:

+ invoke void %0()

+ to label %good unwind label %bad

+good: ; preds = %start

+ ret void

+bad: ; preds = %start

+ %cleanuppad = cleanuppad within none []

+ cleanupret from %cleanuppad unwind to caller