diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Analysis/BasicAliasAnalysis.cpp | 13 | ||||
-rw-r--r-- | lib/Analysis/InstructionSimplify.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/MemorySSA.cpp | 9 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 19 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 4 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 53 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/X86/X86FlagsCopyLowering.cpp | 17 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 |
10 files changed, 92 insertions, 54 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 1a24ae3dba15..f9ecbc043261 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -801,14 +801,15 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, const Value *Object = GetUnderlyingObject(Loc.Ptr, DL); - // If this is a tail call and Loc.Ptr points to a stack location, we know that - // the tail call cannot access or modify the local stack. - // We cannot exclude byval arguments here; these belong to the caller of - // the current function not to the current function, and a tail callee - // may reference them. + // Calls marked 'tail' cannot read or write allocas from the current frame + // because the current frame might be destroyed by the time they run. However, + // a tail call may use an alloca with byval. Calling with byval copies the + // contents of the alloca into argument registers or stack slots, so there is + // no lifetime issue. if (isa<AllocaInst>(Object)) if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) - if (CI->isTailCall()) + if (CI->isTailCall() && + !CI->getAttributes().hasAttrSomewhere(Attribute::ByVal)) return ModRefInfo::NoModRef; // If the pointer is to a locally allocated object that does not escape, diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index f991291f565a..5e72798d459a 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1338,7 +1338,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); const unsigned Width = Op0->getType()->getScalarSizeInBits(); const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros(); - if (EffWidthY <= ShRAmt->getZExtValue()) + if (ShRAmt->uge(EffWidthY)) return X; } @@ -1878,9 +1878,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)), m_Value(XShifted)), m_Value(Y)))) { - const unsigned ShftCnt = ShAmt->getZExtValue(); - const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); const unsigned Width = Op0->getType()->getScalarSizeInBits(); + const unsigned ShftCnt = ShAmt->getLimitedValue(Width); + const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros(); if (EffWidthY <= ShftCnt) { const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index f57d490ce96e..b38c0c4f1439 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -258,13 +258,18 @@ static ClobberAlias instructionClobbersQuery(MemoryDef *MD, if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { // These intrinsics will show up as affecting memory, but they are just - // markers. + // markers, mostly. + // + // FIXME: We probably don't actually want MemorySSA to model these at all + // (including creating MemoryAccesses for them): we just end up inventing + // clobbers where they don't really exist at all. Please see D43269 for + // context. switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: if (UseCS) return {false, NoAlias}; AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc); - return {AR == MustAlias, AR}; + return {AR != NoAlias, AR}; case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 9aa0ea15f3b7..2e6f6edbce55 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1778,15 +1778,16 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) { SDValue Op = N->getOperand(0); EVT OpVT = Op->getValueType(0); - EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); - assert (IVT == N->getValueType(0) && "Bitcast to type of different size"); - SDValue Promoted = GetPromotedFloat(N->getOperand(0)); EVT PromotedVT = Promoted->getValueType(0); // Convert the promoted float value to the desired IVT. - return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT, - Promoted); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue Convert = DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), + IVT, Promoted); + // The final result type might not be an scalar so we need a bitcast. The + // bitcast will be further legalized if needed. + return DAG.getBitcast(N->getValueType(0), Convert); } // Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by @@ -1941,8 +1942,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, - N->getOperand(0)); + // Input type isn't guaranteed to be a scalar int so bitcast if not. The + // bitcast will be legalized further if necessary. + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), + N->getOperand(0).getValueType().getSizeInBits()); + SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0)); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast); } SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 63a1ea13a5f5..133831fa76fb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -269,8 +269,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); case TargetLowering::TypePromoteFloat: { // Convert the promoted float by hand. - SDValue PromotedOp = GetPromotedFloat(InOp); - return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp); + if (!NOutVT.isVector()) + return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp)); break; } case TargetLowering::TypeExpandInteger: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 48e03c6da68f..9e38e675d13a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2374,7 +2374,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); + APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); computeKnownBits(Src, Known, DemandedSrc, Depth + 1); } else { computeKnownBits(Src, Known, Depth + 1); @@ -3533,7 +3533,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); + APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); } return ComputeNumSignBits(Src, Depth + 1); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5f6b6010cae2..c1c15514c09a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7768,10 +7768,29 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); - // FIXME: Why don't we do this for inline asms with MRVs? - if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); - + llvm::Type *CSResultType = CS.getType(); + unsigned numRet; + ArrayRef<Type *> ResultTypes; + SmallVector<SDValue, 1> ResultValues(1); + if (CSResultType->isSingleValueType()) { + numRet = 1; + ResultValues[0] = Val; + ResultTypes = makeArrayRef(CSResultType); + } else { + numRet = CSResultType->getNumContainedTypes(); + assert(Val->getNumOperands() == numRet && + "Mismatch in number of output operands in asm result"); + ResultTypes = CSResultType->subtypes(); + ArrayRef<SDUse> ValueUses = Val->ops(); + ResultValues.resize(numRet); + std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(), + [](const SDUse &u) -> SDValue { return u.get(); }); + } + SmallVector<EVT, 1> ResultVTs(numRet); + for (unsigned i = 0; i < numRet; i++) { + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]); + SDValue Val = ResultValues[i]; + assert(ResultTypes[i]->isSized() && "Unexpected unsized type"); // If the type of the inline asm call site return value is different but // has same size as the type of the asm output bitcast it. One example // of this is for vectors with different width / number of elements. @@ -7782,22 +7801,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // This can also happen for a return value that disagrees with the // register class it is put in, eg. a double in a general-purpose // register on a 32-bit machine. - if (ResultType != Val.getValueType() && - ResultType.getSizeInBits() == Val.getValueSizeInBits()) { - Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), - ResultType, Val); - - } else if (ResultType != Val.getValueType() && - ResultType.isInteger() && Val.getValueType().isInteger()) { - // If a result value was tied to an input value, the computed result may - // have a wider width than the expected result. Extract the relevant - // portion. - Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); + if (ResultVT != Val.getValueType() && + ResultVT.getSizeInBits() == Val.getValueSizeInBits()) + Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val); + else if (ResultVT != Val.getValueType() && ResultVT.isInteger() && + Val.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result + // may have a wider width than the expected result. Extract the + // relevant portion. + Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val); } - assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); + assert(ResultVT == Val.getValueType() && "Asm result value mismatch!"); + ResultVTs[i] = ResultVT; + ResultValues[i] = Val; } + Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ResultVTs), ResultValues); setValue(CS.getInstruction(), Val); // Don't need to use this as a chain in this case. if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f6e13aee968a..331dbcbbe060 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12007,10 +12007,15 @@ static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { auto isSExtOfVecExtract = [&](SDValue Op) -> bool { if (!Op) return false; - if (Op.getOpcode() != ISD::SIGN_EXTEND) + if (Op.getOpcode() != ISD::SIGN_EXTEND && + Op.getOpcode() != ISD::SIGN_EXTEND_INREG) return false; + // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value + // of the right width. SDValue Extract = Op.getOperand(0); + if (Extract.getOpcode() == ISD::ANY_EXTEND) + Extract = Extract.getOperand(0); if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; @@ -12098,8 +12103,10 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, return Reduced; // If we're building a vector out of extended elements from another vector - // we have P9 vector integer extend instructions. - if (Subtarget.hasP9Altivec()) { + // we have P9 vector integer extend instructions. The code assumes legal + // input types (i.e. it can't handle things like v4i16) so do not run before + // legalization. + if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) { Reduced = combineBVOfVecSExt(N, DAG); if (Reduced) return Reduced; diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp index c17c51a7aeac..d2f2f21542a9 100644 --- a/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -97,6 +97,7 @@ public: private: MachineRegisterInfo *MRI; + const X86Subtarget *Subtarget; const X86InstrInfo *TII; const TargetRegisterInfo *TRI; const TargetRegisterClass *PromoteRC; @@ -346,10 +347,10 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() << " **********\n"); - auto &Subtarget = MF.getSubtarget<X86Subtarget>(); + Subtarget = &MF.getSubtarget<X86Subtarget>(); MRI = &MF.getRegInfo(); - TII = Subtarget.getInstrInfo(); - TRI = Subtarget.getRegisterInfo(); + TII = Subtarget->getInstrInfo(); + TRI = Subtarget->getRegisterInfo(); MDT = &getAnalysis<MachineDominatorTree>(); PromoteRC = &X86::GR8RegClass; @@ -960,10 +961,14 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( .addReg(Reg) .addImm(SubRegIdx[OrigRegSize]); } else if (OrigRegSize > TargetRegSize) { - BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG), + if (TargetRegSize == 1 && !Subtarget->is64Bit()) { + // Need to constrain the register class. + MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass); + } + + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg) - .addReg(Reg) - .addImm(SubRegIdx[TargetRegSize]); + .addReg(Reg, 0, SubRegIdx[TargetRegSize]); } else { BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg) .addReg(Reg); diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5c2efe885e22..32df6d581577 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3109,14 +3109,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } if (NeedToShuffleReuses) { // TODO: Merge this shuffle with the ReorderShuffleMask. - if (!E->ReorderIndices.empty()) + if (E->ReorderIndices.empty()) Builder.SetInsertPoint(VL0); - else if (auto *I = dyn_cast<Instruction>(V)) - Builder.SetInsertPoint(I->getParent(), - std::next(I->getIterator())); - else - Builder.SetInsertPoint(&F->getEntryBlock(), - F->getEntryBlock().getFirstInsertionPt()); V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); } |