diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-06-09 19:06:30 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-06-09 19:06:30 +0000 |
commit | 85d8b2bbe386bcfe669575d05b61482d7be07e5d (patch) | |
tree | 1dc5e75ab222a9ead44c699eceafab7a6ca7b310 /lib | |
parent | 5a5ac124e1efaf208671f01c46edb15f29ed2a0b (diff) | |
download | src-85d8b2bbe386bcfe669575d05b61482d7be07e5d.tar.gz src-85d8b2bbe386bcfe669575d05b61482d7be07e5d.zip |
Vendor import of llvm trunk r239412:vendor/llvm/llvm-trunk-r239412
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=284184
svn path=/vendor/llvm/llvm-trunk-r239412/; revision=284185; tag=vendor/llvm/llvm-trunk-r239412
Diffstat (limited to 'lib')
461 files changed, 18070 insertions, 8114 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index f54e234bbead..ce46d5300517 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -93,7 +93,7 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { // location this memory access defines. The best we can say // is that if the call references what this instruction // defines, it must be clobbered by this location. - const AliasAnalysis::Location DefLoc = AA->getLocation(I); + const AliasAnalysis::Location DefLoc = MemoryLocation::get(I); if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) return AliasAnalysis::ModRef; } @@ -267,78 +267,6 @@ AliasAnalysis::getModRefBehavior(const Function *F) { // AliasAnalysis non-virtual helper method implementation //===----------------------------------------------------------------------===// -AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) { - AAMDNodes AATags; - LI->getAAMetadata(AATags); - - return Location(LI->getPointerOperand(), - getTypeStoreSize(LI->getType()), AATags); -} - -AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) { - AAMDNodes AATags; - SI->getAAMetadata(AATags); - - return Location(SI->getPointerOperand(), - getTypeStoreSize(SI->getValueOperand()->getType()), AATags); -} - -AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) { - AAMDNodes AATags; - VI->getAAMetadata(AATags); - - return Location(VI->getPointerOperand(), UnknownSize, AATags); -} - -AliasAnalysis::Location -AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) { - AAMDNodes AATags; - CXI->getAAMetadata(AATags); - - return Location(CXI->getPointerOperand(), - getTypeStoreSize(CXI->getCompareOperand()->getType()), - AATags); -} - -AliasAnalysis::Location -AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) { - AAMDNodes AATags; - RMWI->getAAMetadata(AATags); - - return Location(RMWI->getPointerOperand(), - getTypeStoreSize(RMWI->getValOperand()->getType()), AATags); -} - -AliasAnalysis::Location -AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) { - uint64_t Size = UnknownSize; - if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) - Size = C->getValue().getZExtValue(); - - // memcpy/memmove can have AA tags. For memcpy, they apply - // to both the source and the destination. - AAMDNodes AATags; - MTI->getAAMetadata(AATags); - - return Location(MTI->getRawSource(), Size, AATags); -} - -AliasAnalysis::Location -AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) { - uint64_t Size = UnknownSize; - if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) - Size = C->getValue().getZExtValue(); - - // memcpy/memmove can have AA tags. For memcpy, they apply - // to both the source and the destination. - AAMDNodes AATags; - MTI->getAAMetadata(AATags); - - return Location(MTI->getRawDest(), Size, AATags); -} - - - AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { // Be conservative in the face of volatile/atomic. @@ -347,7 +275,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. - if (Loc.Ptr && !alias(getLocation(L), Loc)) + if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc)) return NoModRef; // Otherwise, a load just reads. @@ -363,7 +291,7 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { if (Loc.Ptr) { // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. - if (!alias(getLocation(S), Loc)) + if (!alias(MemoryLocation::get(S), Loc)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have @@ -383,7 +311,7 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { if (Loc.Ptr) { // If the va_arg address cannot alias the pointer in question, then the // specified memory cannot be accessed by the va_arg. - if (!alias(getLocation(V), Loc)) + if (!alias(MemoryLocation::get(V), Loc)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have @@ -403,7 +331,7 @@ AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) { return ModRef; // If the cmpxchg address does not alias the location, it does not access it. - if (Loc.Ptr && !alias(getLocation(CX), Loc)) + if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc)) return NoModRef; return ModRef; @@ -416,7 +344,7 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { return ModRef; // If the atomicrmw address does not alias the location, it does not access it. - if (Loc.Ptr && !alias(getLocation(RMW), Loc)) + if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc)) return NoModRef; return ModRef; diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 273eacc16e72..dd6a3a0715e1 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -219,8 +219,8 @@ bool AAEval::runOnFunction(Function &F) { I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end(); I2 != E2; ++I2) { - switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)), - AA.getLocation(cast<StoreInst>(*I2)))) { + switch (AA.alias(MemoryLocation::get(cast<LoadInst>(*I1)), + MemoryLocation::get(cast<StoreInst>(*I2)))) { case AliasAnalysis::NoAlias: PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); @@ -245,8 +245,8 @@ bool AAEval::runOnFunction(Function &F) { for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); I1 != E; ++I1) { for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { - switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)), - AA.getLocation(cast<StoreInst>(*I2)))) { + switch (AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)), + MemoryLocation::get(cast<StoreInst>(*I2)))) { case AliasAnalysis::NoAlias: PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 50890c17f2eb..12c1c7d4af90 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -130,7 +130,7 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { if (UnknownInsts.empty()) addRef(); - UnknownInsts.push_back(I); + UnknownInsts.emplace_back(I); if (!I->mayWriteToMemory()) { AliasTy = MayAlias; diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 091943bfc7b3..430b41241edf 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -543,6 +543,10 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { return false; } +void BranchProbabilityInfo::releaseMemory() { + Weights.clear(); +} + void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { OS << "---- Branch Probabilities ----\n"; // We print the probabilities from the last function the analysis ran over, diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 32fe9b9495d1..b22ee7e24931 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -42,6 +42,7 @@ add_llvm_library(LLVMAnalysis MemDerefPrinter.cpp MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp + MemoryLocation.cpp ModuleDebugInfoPrinter.cpp NoAliasAnalysis.cpp PHITransAddr.cpp diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 808a38b6346b..b16cdfef3375 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -779,23 +779,56 @@ void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, } } -void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { - const SCEV *Src = Pair->Src; - const SCEV *Dst = Pair->Dst; - IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); - IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); - if (SrcTy == nullptr || DstTy == nullptr) { - assert(SrcTy == DstTy && "This function only unify integer types and " - "expect Src and Dst share the same type " - "otherwise."); - return; +void DependenceAnalysis::unifySubscriptType(ArrayRef<Subscript *> Pairs) { + + unsigned widestWidthSeen = 0; + Type *widestType; + + // Go through each pair and find the widest bit to which we need + // to extend all of them. + for (unsigned i = 0; i < Pairs.size(); i++) { + const SCEV *Src = Pairs[i]->Src; + const SCEV *Dst = Pairs[i]->Dst; + IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); + IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); + if (SrcTy == nullptr || DstTy == nullptr) { + assert(SrcTy == DstTy && "This function only unify integer types and " + "expect Src and Dst share the same type " + "otherwise."); + continue; + } + if (SrcTy->getBitWidth() > widestWidthSeen) { + widestWidthSeen = SrcTy->getBitWidth(); + widestType = SrcTy; + } + if (DstTy->getBitWidth() > widestWidthSeen) { + widestWidthSeen = DstTy->getBitWidth(); + widestType = DstTy; + } } - if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { - // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). - Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); - } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { - // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). - Pair->Src = SE->getSignExtendExpr(Src, DstTy); + + + assert(widestWidthSeen > 0); + + // Now extend each pair to the widest seen. + for (unsigned i = 0; i < Pairs.size(); i++) { + const SCEV *Src = Pairs[i]->Src; + const SCEV *Dst = Pairs[i]->Dst; + IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); + IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); + if (SrcTy == nullptr || DstTy == nullptr) { + assert(SrcTy == DstTy && "This function only unify integer types and " + "expect Src and Dst share the same type " + "otherwise."); + continue; + } + if (SrcTy->getBitWidth() < widestWidthSeen) + // Sign-extend Src to widestType + Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType); + if (DstTy->getBitWidth() < widestWidthSeen) { + // Sign-extend Dst to widestType + Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType); + } } } @@ -2937,7 +2970,7 @@ const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { // return the coefficient (the step) // corresponding to the specified loop. // If there isn't one, return 0. -// For example, given a*i + b*j + c*k, zeroing the coefficient +// For example, given a*i + b*j + c*k, finding the coefficient // corresponding to the j loop would yield b. const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, const Loop *TargetLoop) const { @@ -3574,13 +3607,16 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, SmallBitVector Sivs(Pairs); SmallBitVector Mivs(Pairs); SmallBitVector ConstrainedLevels(MaxLevels + 1); + SmallVector<Subscript *, 4> PairsInGroup; for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { DEBUG(dbgs() << SJ << " "); if (Pair[SJ].Classification == Subscript::SIV) Sivs.set(SJ); else Mivs.set(SJ); + PairsInGroup.push_back(&Pair[SJ]); } + unifySubscriptType(PairsInGroup); DEBUG(dbgs() << "}\n"); while (Sivs.any()) { bool Changed = false; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 097b99eb0a5e..ec56d888dc2f 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -3144,6 +3144,90 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, RecursionLimit); } +/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is +/// replaced with RepOp. +static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const Query &Q, + unsigned MaxRecurse) { + // Trivial replacement. + if (V == Op) + return RepOp; + + auto *I = dyn_cast<Instruction>(V); + if (!I) + return nullptr; + + // If this is a binary operator, try to simplify it with the replaced op. + if (auto *B = dyn_cast<BinaryOperator>(I)) { + // Consider: + // %cmp = icmp eq i32 %x, 2147483647 + // %add = add nsw i32 %x, 1 + // %sel = select i1 %cmp, i32 -2147483648, i32 %add + // + // We can't replace %sel with %add unless we strip away the flags. + if (isa<OverflowingBinaryOperator>(B)) + if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap()) + return nullptr; + if (isa<PossiblyExactOperator>(B)) + if (B->isExact()) + return nullptr; + + if (MaxRecurse) { + if (B->getOperand(0) == Op) + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q, + MaxRecurse - 1); + if (B->getOperand(1) == Op) + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q, + MaxRecurse - 1); + } + } + + // Same for CmpInsts. + if (CmpInst *C = dyn_cast<CmpInst>(I)) { + if (MaxRecurse) { + if (C->getOperand(0) == Op) + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q, + MaxRecurse - 1); + if (C->getOperand(1) == Op) + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q, + MaxRecurse - 1); + } + } + + // TODO: We could hand off more cases to instsimplify here. + + // If all operands are constant after substituting Op for RepOp then we can + // constant fold the instruction. + if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) { + // Build a list of all constant operands. + SmallVector<Constant *, 8> ConstOps; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (I->getOperand(i) == Op) + ConstOps.push_back(CRepOp); + else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i))) + ConstOps.push_back(COp); + else + break; + } + + // All operands were constants, fold it. + if (ConstOps.size() == I->getNumOperands()) { + if (CmpInst *C = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], + ConstOps[1], Q.DL, Q.TLI); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(ConstOps[0], Q.DL); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps, + Q.DL, Q.TLI); + } + } + + return nullptr; +} + /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, @@ -3172,29 +3256,28 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X return TrueVal; - const auto *ICI = dyn_cast<ICmpInst>(CondVal); - unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); - if (ICI && BitWidth) { + if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) { + unsigned BitWidth = Q.DL.getTypeSizeInBits(TrueVal->getType()); ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); APInt MinSignedValue = APInt::getSignBit(BitWidth); Value *X; const APInt *Y; bool TrueWhenUnset; bool IsBitTest = false; if (ICmpInst::isEquality(Pred) && - match(ICI->getOperand(0), m_And(m_Value(X), m_APInt(Y))) && - match(ICI->getOperand(1), m_Zero())) { + match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) && + match(CmpRHS, m_Zero())) { IsBitTest = true; TrueWhenUnset = Pred == ICmpInst::ICMP_EQ; - } else if (Pred == ICmpInst::ICMP_SLT && - match(ICI->getOperand(1), m_Zero())) { - X = ICI->getOperand(0); + } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) { + X = CmpLHS; Y = &MinSignedValue; IsBitTest = true; TrueWhenUnset = false; - } else if (Pred == ICmpInst::ICMP_SGT && - match(ICI->getOperand(1), m_AllOnes())) { - X = ICI->getOperand(0); + } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) { + X = CmpLHS; Y = &MinSignedValue; IsBitTest = true; TrueWhenUnset = true; @@ -3225,6 +3308,50 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, return TrueWhenUnset ? TrueVal : FalseVal; } } + if (ICI->hasOneUse()) { + const APInt *C; + if (match(CmpRHS, m_APInt(C))) { + // X < MIN ? T : F --> F + if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue()) + return FalseVal; + // X < MIN ? T : F --> F + if (Pred == ICmpInst::ICMP_ULT && C->isMinValue()) + return FalseVal; + // X > MAX ? T : F --> F + if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue()) + return FalseVal; + // X > MAX ? T : F --> F + if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue()) + return FalseVal; + } + } + + // If we have an equality comparison then we know the value in one of the + // arms of the select. See if substituting this value into the arm and + // simplifying the result yields the same value as the other arm. + if (Pred == ICmpInst::ICMP_EQ) { + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + TrueVal) + return FalseVal; + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + FalseVal) + return FalseVal; + } else if (Pred == ICmpInst::ICMP_NE) { + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + FalseVal) + return TrueVal; + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == + TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == + TrueVal) + return TrueVal; + } } return nullptr; diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index b70de00db04b..c661c7b87dcb 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -177,15 +177,21 @@ void LoopAccessInfo::RuntimePointerCheck::print( } } -bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( +unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks( const SmallVectorImpl<int> *PtrPartition) const { unsigned NumPointers = Pointers.size(); + unsigned CheckCount = 0; for (unsigned I = 0; I < NumPointers; ++I) for (unsigned J = I + 1; J < NumPointers; ++J) if (needsChecking(I, J, PtrPartition)) - return true; - return false; + CheckCount++; + return CheckCount; +} + +bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( + const SmallVectorImpl<int> *PtrPartition) const { + return getNumberOfChecks(PtrPartition) != 0; } namespace { @@ -220,10 +226,11 @@ public: } /// \brief Check whether we can check the pointers at runtime for - /// non-intersection. + /// non-intersection. Returns true when we have 0 pointers + /// (a check on 0 pointers for non-intersection will always return true). bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, - unsigned &NumComparisons, ScalarEvolution *SE, - Loop *TheLoop, const ValueToValueMap &Strides, + bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, + const ValueToValueMap &Strides, bool ShouldCheckStride = false); /// \brief Goes over all memory accesses, checks whether a RT check is needed @@ -289,29 +296,23 @@ static bool hasComputableBounds(ScalarEvolution *SE, return AR->isAffine(); } -/// \brief Check the stride of the pointer and ensure that it does not wrap in -/// the address space. -static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap); - bool AccessAnalysis::canCheckPtrAtRT( - LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons, + LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, bool ShouldCheckStride) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; + NeedRTCheck = false; + if (!IsRTCheckNeeded) return true; + bool IsDepCheckNeeded = isDependencyCheckNeeded(); - NumComparisons = 0; // We assign a consecutive id to access from different alias sets. // Accesses between different groups doesn't need to be checked. unsigned ASId = 1; for (auto &AS : AST) { - unsigned NumReadPtrChecks = 0; - unsigned NumWritePtrChecks = 0; - // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; @@ -322,11 +323,6 @@ bool AccessAnalysis::canCheckPtrAtRT( bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); MemAccessInfo Access(Ptr, IsWrite); - if (IsWrite) - ++NumWritePtrChecks; - else - ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. @@ -354,16 +350,15 @@ bool AccessAnalysis::canCheckPtrAtRT( } } - if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2) - NumComparisons += 0; // Only one dependence set. - else { - NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks + - NumWritePtrChecks - 1)); - } - ++ASId; } + // We need a runtime check if there are any accesses that need checking. + // However, some accesses cannot be checked (for example because we + // can't determine their bounds). In these cases we would need a check + // but wouldn't be able to add it. + NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr); + // If the pointers that we would use for the bounds comparison have different // address spaces, assume the values aren't directly comparable, so we can't // use them for the runtime check. We also have to assume they could @@ -510,8 +505,8 @@ static bool isInBoundsGep(Value *Ptr) { } /// \brief Check whether the access through \p Ptr has a constant stride. -static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap) { +int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap) { const Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -678,6 +673,42 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, return false; } +/// \brief Check the dependence for two accesses with the same stride \p Stride. +/// \p Distance is the positive distance and \p TypeByteSize is type size in +/// bytes. +/// +/// \returns true if they are independent. +static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, + unsigned TypeByteSize) { + assert(Stride > 1 && "The stride must be greater than 1"); + assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); + assert(Distance > 0 && "The distance must be non-zero"); + + // Skip if the distance is not multiple of type byte size. + if (Distance % TypeByteSize) + return false; + + unsigned ScaledDist = Distance / TypeByteSize; + + // No dependence if the scaled distance is not multiple of the stride. + // E.g. + // for (i = 0; i < 1024 ; i += 4) + // A[i+2] = A[i] + 1; + // + // Two accesses in memory (scaled distance is 2, stride is 4): + // | A[0] | | | | A[4] | | | | + // | | | A[2] | | | | A[6] | | + // + // E.g. + // for (i = 0; i < 1024 ; i += 3) + // A[i+4] = A[i] + 1; + // + // Two accesses in memory (scaled distance is 4, stride is 3): + // | A[0] | | | A[3] | | | A[6] | | | + // | | | | | A[4] | | | A[7] | | + return ScaledDist % Stride; +} + MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, unsigned BIdx, @@ -778,34 +809,87 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, unsigned Distance = (unsigned) Val.getZExtValue(); + unsigned Stride = std::abs(StrideAPtr); + if (Stride > 1 && + areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) + return Dependence::NoDep; + // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? VectorizerParams::VectorizationFactor : 1); unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? VectorizerParams::VectorizationInterleave : 1); + // The minimum number of iterations for a vectorized/unrolled version. + unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U); + + // It's not vectorizable if the distance is smaller than the minimum distance + // needed for a vectroized/unrolled version. Vectorizing one iteration in + // front needs TypeByteSize * Stride. Vectorizing the last iteration needs + // TypeByteSize (No need to plus the last gap distance). + // + // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. + // foo(int *A) { + // int *B = (int *)((char *)A + 14); + // for (i = 0 ; i < 1024 ; i += 2) + // B[i] = A[i] + 1; + // } + // + // Two accesses in memory (stride is 2): + // | A[0] | | A[2] | | A[4] | | A[6] | | + // | B[0] | | B[2] | | B[4] | + // + // Distance needs for vectorizing iterations except the last iteration: + // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4. + // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4. + // + // If MinNumIter is 2, it is vectorizable as the minimum distance needed is + // 12, which is less than distance. + // + // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), + // the minimum distance needed is 28, which is greater than distance. It is + // not safe to do vectorization. + unsigned MinDistanceNeeded = + TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; + if (MinDistanceNeeded > Distance) { + DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance + << '\n'); + return Dependence::Backward; + } - // The distance must be bigger than the size needed for a vectorized version - // of the operation and the size of the vectorized operation must not be - // bigger than the currrent maximum size. - if (Distance < 2*TypeByteSize || - 2*TypeByteSize > MaxSafeDepDistBytes || - Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { - DEBUG(dbgs() << "LAA: Failure because of Positive distance " - << Val.getSExtValue() << '\n'); + // Unsafe if the minimum distance needed is greater than max safe distance. + if (MinDistanceNeeded > MaxSafeDepDistBytes) { + DEBUG(dbgs() << "LAA: Failure because it needs at least " + << MinDistanceNeeded << " size in bytes"); return Dependence::Backward; } // Positive distance bigger than max vectorization factor. - MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ? - Distance : MaxSafeDepDistBytes; + // FIXME: Should use max factor instead of max distance in bytes, which could + // not handle different types. + // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. + // void foo (int *A, char *B) { + // for (unsigned i = 0; i < 1024; i++) { + // A[i+2] = A[i] + 1; + // B[i+2] = B[i] + 1; + // } + // } + // + // This case is currently unsafe according to the max safe distance. If we + // analyze the two accesses on array B, the max safe dependence distance + // is 2. Then we analyze the accesses on array A, the minimum distance needed + // is 8, which is less than 2 and forbidden vectorization, But actually + // both A and B could be vectorized by 2 iterations. + MaxSafeDepDistBytes = + Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes; bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && couldPreventStoreLoadForward(Distance, TypeByteSize)) return Dependence::BackwardVectorizableButPreventsForwarding; - DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << - " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); + DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() + << " with max VF = " + << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n'); return Dependence::BackwardVectorizable; } @@ -1066,7 +1150,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (Seen.insert(Ptr).second) { ++NumReadWrites; - AliasAnalysis::Location Loc = AA->getLocation(ST); + AliasAnalysis::Location Loc = MemoryLocation::get(ST); // The TBAA metadata could have a control dependency on the predication // condition, so we cannot rely on it when determining whether or not we // need runtime pointer checks. @@ -1102,7 +1186,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { IsReadOnlyPtr = true; } - AliasAnalysis::Location Loc = AA->getLocation(LD); + AliasAnalysis::Location Loc = MemoryLocation::get(LD); // The TBAA metadata could have a control dependency on the predication // condition, so we cannot rely on it when determining whether or not we // need runtime pointer checks. @@ -1123,22 +1207,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Build dependence sets and check whether we need a runtime pointer bounds // check. Accesses.buildDependenceSets(); - bool NeedRTCheck = Accesses.isRTCheckNeeded(); // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool CanDoRT = false; - if (NeedRTCheck) - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop, - Strides); - - DEBUG(dbgs() << "LAA: We need to do " << NumComparisons << - " pointer comparisons.\n"); + bool NeedRTCheck; + bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, + NeedRTCheck, SE, + TheLoop, Strides); - // If we only have one set of dependences to check pointers among we don't - // need a runtime check. - if (NumComparisons == 0 && NeedRTCheck) - NeedRTCheck = false; + DEBUG(dbgs() << "LAA: We need to do " + << PtrRtCheck.getNumberOfChecks(nullptr) + << " pointer comparisons.\n"); // Check that we found the bounds for the pointer. if (CanDoRT) @@ -1171,10 +1250,11 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { PtrRtCheck.reset(); PtrRtCheck.Need = true; - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, + CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE, TheLoop, Strides, true); + // Check that we found the bounds for the pointer. - if (!CanDoRT && NumComparisons > 0) { + if (NeedRTCheck && !CanDoRT) { emitAnalysis(LoopAccessReport() << "cannot check memory dependencies at runtime"); DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); @@ -1319,7 +1399,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), + : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 3c1826a58e66..255bae61eb2f 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -124,11 +124,11 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, AliasAnalysis *AA) { if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { if (LI->isUnordered()) { - Loc = AA->getLocation(LI); + Loc = MemoryLocation::get(LI); return AliasAnalysis::Ref; } if (LI->getOrdering() == Monotonic) { - Loc = AA->getLocation(LI); + Loc = MemoryLocation::get(LI); return AliasAnalysis::ModRef; } Loc = AliasAnalysis::Location(); @@ -137,11 +137,11 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->isUnordered()) { - Loc = AA->getLocation(SI); + Loc = MemoryLocation::get(SI); return AliasAnalysis::Mod; } if (SI->getOrdering() == Monotonic) { - Loc = AA->getLocation(SI); + Loc = MemoryLocation::get(SI); return AliasAnalysis::ModRef; } Loc = AliasAnalysis::Location(); @@ -149,7 +149,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, } if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { - Loc = AA->getLocation(V); + Loc = MemoryLocation::get(V); return AliasAnalysis::ModRef; } @@ -486,7 +486,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } } - AliasAnalysis::Location LoadLoc = AA->getLocation(LI); + AliasAnalysis::Location LoadLoc = MemoryLocation::get(LI); // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); @@ -575,7 +575,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. - AliasAnalysis::Location StoreLoc = AA->getLocation(SI); + AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI); // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); @@ -872,7 +872,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { void MemoryDependenceAnalysis:: getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { - const AliasAnalysis::Location Loc = AA->getLocation(QueryInst); + const AliasAnalysis::Location Loc = MemoryLocation::get(QueryInst); bool isLoad = isa<LoadInst>(QueryInst); BasicBlock *FromBB = QueryInst->getParent(); assert(FromBB); @@ -1278,8 +1278,7 @@ getNonLocalPointerDepFromBB(Instruction *QueryInst, // Get the PHI translated pointer in this predecessor. This can fail if // not translatable, in which case the getAddr() returns null. PHITransAddr &PredPointer = PredList.back().second; - PredPointer.PHITranslateValue(BB, Pred, nullptr); - + PredPointer.PHITranslateValue(BB, Pred, DT, /*MustDominate=*/false); Value *PredPtrVal = PredPointer.getAddr(); // Check to see if we have already visited this pred block with another diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp new file mode 100644 index 000000000000..f87a017b9211 --- /dev/null +++ b/lib/Analysis/MemoryLocation.cpp @@ -0,0 +1,90 @@ +//===- MemoryLocation.cpp - Memory location descriptions -------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +using namespace llvm; + +MemoryLocation MemoryLocation::get(const LoadInst *LI) { + AAMDNodes AATags; + LI->getAAMetadata(AATags); + const auto &DL = LI->getModule()->getDataLayout(); + + return MemoryLocation(LI->getPointerOperand(), + DL.getTypeStoreSize(LI->getType()), AATags); +} + +MemoryLocation MemoryLocation::get(const StoreInst *SI) { + AAMDNodes AATags; + SI->getAAMetadata(AATags); + const auto &DL = SI->getModule()->getDataLayout(); + + return MemoryLocation(SI->getPointerOperand(), + DL.getTypeStoreSize(SI->getValueOperand()->getType()), + AATags); +} + +MemoryLocation MemoryLocation::get(const VAArgInst *VI) { + AAMDNodes AATags; + VI->getAAMetadata(AATags); + + return MemoryLocation(VI->getPointerOperand(), UnknownSize, AATags); +} + +MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) { + AAMDNodes AATags; + CXI->getAAMetadata(AATags); + const auto &DL = CXI->getModule()->getDataLayout(); + + return MemoryLocation( + CXI->getPointerOperand(), + DL.getTypeStoreSize(CXI->getCompareOperand()->getType()), AATags); +} + +MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) { + AAMDNodes AATags; + RMWI->getAAMetadata(AATags); + const auto &DL = RMWI->getModule()->getDataLayout(); + + return MemoryLocation(RMWI->getPointerOperand(), + DL.getTypeStoreSize(RMWI->getValOperand()->getType()), + AATags); +} + +MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have AA tags. For memcpy, they apply + // to both the source and the destination. + AAMDNodes AATags; + MTI->getAAMetadata(AATags); + + return MemoryLocation(MTI->getRawSource(), Size, AATags); +} + +MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have AA tags. For memcpy, they apply + // to both the source and the destination. + AAMDNodes AATags; + MTI->getAAMetadata(AATags); + + return MemoryLocation(MTI->getRawDest(), Size, AATags); +} diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 177684fdc9a7..633d6aaad35e 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -150,7 +150,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, if (!Inst) return V; // Determine whether 'Inst' is an input to our PHI translatable expression. - bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); + bool isInput = + std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end(); // Handle inputs instructions if needed. if (isInput) { @@ -276,7 +277,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, isNSW = isNUW = false; // If the old 'LHS' was an input, add the new 'LHS' as an input. - if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) { + if (std::find(InstInputs.begin(), InstInputs.end(), BOp) != + InstInputs.end()) { RemoveInstInputs(BOp, InstInputs); AddAsInput(LHS); } @@ -313,21 +315,26 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, /// PHITranslateValue - PHI translate the current address up the CFG from -/// CurBB to Pred, updating our state to reflect any needed changes. If the -/// dominator tree DT is non-null, the translated value must dominate +/// CurBB to Pred, updating our state to reflect any needed changes. If +/// 'MustDominate' is true, the translated value must dominate /// PredBB. This returns true on failure and sets Addr to null. bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, - const DominatorTree *DT) { + const DominatorTree *DT, + bool MustDominate) { + assert(DT || !MustDominate); assert(Verify() && "Invalid PHITransAddr!"); - Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT); + if (DT && DT->isReachableFromEntry(PredBB)) + Addr = + PHITranslateSubExpr(Addr, CurBB, PredBB, MustDominate ? DT : nullptr); + else + Addr = nullptr; assert(Verify() && "Invalid PHITransAddr!"); - if (DT) { + if (MustDominate) // Make sure the value is live in the predecessor. if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr)) if (!DT->dominates(Inst->getParent(), PredBB)) Addr = nullptr; - } return Addr == nullptr; } @@ -370,7 +377,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // See if we have a version of this value already available and dominating // PredBB. If so, there is no need to insert a new instance of it. PHITransAddr Tmp(InVal, DL, AC); - if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) + if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true)) return Tmp.getAddr(); // If we don't have an available version of this value, it must be an diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 0bd427bf74e9..f82235d0c26e 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1712,7 +1712,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // would confuse the logic below that expects proper IVs. if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) { Phi->replaceAllUsesWith(V); - DeadInsts.push_back(Phi); + DeadInsts.emplace_back(Phi); ++NumElim; DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); @@ -1787,7 +1787,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); } IsomorphicInc->replaceAllUsesWith(NewInc); - DeadInsts.push_back(IsomorphicInc); + DeadInsts.emplace_back(IsomorphicInc); } } DEBUG_WITH_TYPE(DebugType, dbgs() @@ -1800,13 +1800,30 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); } Phi->replaceAllUsesWith(NewIV); - DeadInsts.push_back(Phi); + DeadInsts.emplace_back(Phi); } return NumElim; } bool SCEVExpander::isHighCostExpansionHelper( const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) { + + // Zero/One operand expressions + switch (S->getSCEVType()) { + case scUnknown: + case scConstant: + return false; + case scTruncate: + return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L, + Processed); + case scZeroExtend: + return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(), + L, Processed); + case scSignExtend: + return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(), + L, Processed); + } + if (!Processed.insert(S).second) return false; @@ -1849,23 +1866,22 @@ bool SCEVExpander::isHighCostExpansionHelper( } } - // Recurse past add expressions, which commonly occur in the + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) + return true; + + // Recurse past nary expressions, which commonly occur in the // BackedgeTakenCount. They may already exist in program code, and if not, // they are not too expensive rematerialize. - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) { + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { if (isHighCostExpansionHelper(*I, L, Processed)) return true; } - return false; } - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) - return true; - // If we haven't recognized an expensive SCEV pattern, assume it's an // expression produced by program code. return false; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index e1744d1f2965..24cada3e5313 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -100,9 +100,10 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { + int64_t Scale, + unsigned AddrSpace) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale); + Scale, AddrSpace); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, @@ -118,9 +119,10 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { + int64_t Scale, + unsigned AddrSpace) const { return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale); + Scale, AddrSpace); } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { @@ -235,6 +237,13 @@ TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); } +unsigned TargetTransformInfo::getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + unsigned Alignment, unsigned AddressSpace) const { + return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); +} + unsigned TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys) const { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index a55712c6296a..c4f046340fce 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2967,38 +2967,25 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + Type *VTy = GEP->getType(); + Type *Ty = VTy->getPointerElementType(); + const Value *Base = GEP->getPointerOperand(); + // Conservatively require that the base pointer be fully dereferenceable. - if (!Visited.insert(GEP->getOperand(0)).second) + if (!Visited.insert(Base).second) return false; - if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI, + if (!isDereferenceablePointer(Base, DL, CtxI, DT, TLI, Visited)) return false; - // Check the indices. - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::const_op_iterator I = GEP->op_begin()+1, - E = GEP->op_end(); I != E; ++I) { - Value *Index = *I; - Type *Ty = *GTI++; - // Struct indices can't be out of bounds. - if (isa<StructType>(Ty)) - continue; - ConstantInt *CI = dyn_cast<ConstantInt>(Index); - if (!CI) - return false; - // Zero is always ok. - if (CI->isZero()) - continue; - // Check to see that it's within the bounds of an array. - ArrayType *ATy = dyn_cast<ArrayType>(Ty); - if (!ATy) - return false; - if (CI->getValue().getActiveBits() > 64) - return false; - if (CI->getZExtValue() >= ATy->getNumElements()) - return false; - } - // Indices check out; this is dereferenceable. - return true; + + APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0); + if (!GEP->accumulateConstantOffset(DL, Offset)) + return false; + + // Check if the load is within the bounds of the underlying object. + uint64_t LoadSize = DL.getTypeStoreSize(Ty); + Type *BaseType = Base->getType()->getPointerElementType(); + return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)); } // For gc.relocate, look through relocations diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 05c24285028f..09fe6c0a0bd8 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -252,7 +252,7 @@ void LLLexer::SkipLineComment() { } } -/// LexAt - Lex all tokens that start with an @ character: +/// Lex all tokens that start with an @ character. /// GlobalVar @\"[^\"]*\" /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* /// GlobalVarID @[0-9]+ @@ -375,7 +375,7 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { return lltok::Error; } -/// LexPercent - Lex all tokens that start with a % character: +/// Lex all tokens that start with a % character. /// LocalVar ::= %\"[^\"]*\" /// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* /// LocalVarID ::= %[0-9]+ @@ -383,7 +383,7 @@ lltok::Kind LLLexer::LexPercent() { return LexVar(lltok::LocalVar, lltok::LocalVarID); } -/// LexQuote - Lex all tokens that start with a " character: +/// Lex all tokens that start with a " character. /// QuoteLabel "[^"]+": /// StringConstant "[^"]*" lltok::Kind LLLexer::LexQuote() { @@ -404,7 +404,7 @@ lltok::Kind LLLexer::LexQuote() { return kind; } -/// LexExclaim: +/// Lex all tokens that start with a ! character. /// !foo /// ! lltok::Kind LLLexer::LexExclaim() { @@ -425,7 +425,7 @@ lltok::Kind LLLexer::LexExclaim() { return lltok::exclaim; } -/// LexHash - Lex all tokens that start with a # character: +/// Lex all tokens that start with a # character. /// AttrGrpID ::= #[0-9]+ lltok::Kind LLLexer::LexHash() { // Handle AttrGrpID: #[0-9]+ @@ -443,7 +443,7 @@ lltok::Kind LLLexer::LexHash() { return lltok::Error; } -/// LexIdentifier: Handle several related productions: +/// Lex a label, integer type, keyword, or hexadecimal integer constant. /// Label [-a-zA-Z$._0-9]+: /// IntegerType i[0-9]+ /// Keyword sdiv, float, ... @@ -800,9 +800,8 @@ lltok::Kind LLLexer::LexIdentifier() { return lltok::Error; } - -/// Lex0x: Handle productions that start with 0x, knowing that it matches and -/// that this is not a label: +/// Lex all tokens that start with a 0x prefix, knowing they match and are not +/// labels. /// HexFPConstant 0x[0-9A-Fa-f]+ /// HexFP80Constant 0xK[0-9A-Fa-f]+ /// HexFP128Constant 0xL[0-9A-Fa-f]+ @@ -860,7 +859,7 @@ lltok::Kind LLLexer::Lex0x() { } } -/// LexIdentifier: Handle several related productions: +/// Lex tokens for a label or a numeric constant, possibly starting with -. /// Label [-a-zA-Z$._0-9]+: /// NInteger -[0-9]+ /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? @@ -938,6 +937,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { return lltok::APFloat; } +/// Lex a floating point constant starting with +. /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? lltok::Kind LLLexer::LexPositive() { // If the letter after the negative is a number, this is probably not a diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index a52e20fb35b0..681af2a90072 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -189,7 +189,7 @@ bool LLParser::ParseTopLevelEntities() { // The Global variable production with no name can have many different // optional leading prefixes, the production is: // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalDLLStorageClass - // OptionalThreadLocal OptionalAddrSpace OptionalUnNammedAddr + // OptionalThreadLocal OptionalAddrSpace OptionalUnnamedAddr // ('constant'|'global') ... case lltok::kw_private: // OptionalLinkage case lltok::kw_internal: // OptionalLinkage @@ -615,12 +615,12 @@ static bool isValidVisibilityForLinkage(unsigned V, unsigned L) { /// ParseAlias: /// ::= GlobalVar '=' OptionalLinkage OptionalVisibility /// OptionalDLLStorageClass OptionalThreadLocal -/// OptionalUnNammedAddr 'alias' Aliasee +/// OptionalUnnamedAddr 'alias' Aliasee /// /// Aliasee /// ::= TypeAndValue /// -/// Everything through OptionalUnNammedAddr has already been parsed. +/// Everything through OptionalUnnamedAddr has already been parsed. /// bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L, unsigned Visibility, unsigned DLLStorageClass, @@ -705,13 +705,13 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L, /// ParseGlobal /// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalDLLStorageClass -/// OptionalThreadLocal OptionalUnNammedAddr OptionalAddrSpace +/// OptionalThreadLocal OptionalUnnamedAddr OptionalAddrSpace /// OptionalExternallyInitialized GlobalType Type Const /// ::= OptionalLinkage OptionalVisibility OptionalDLLStorageClass -/// OptionalThreadLocal OptionalUnNammedAddr OptionalAddrSpace +/// OptionalThreadLocal OptionalUnnamedAddr OptionalAddrSpace /// OptionalExternallyInitialized GlobalType Type Const /// -/// Everything up to and including OptionalUnNammedAddr has been parsed +/// Everything up to and including OptionalUnnamedAddr has been parsed /// already. /// bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, @@ -1902,9 +1902,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, return Error(TypeLoc, "invalid type for function argument"); unsigned AttrIndex = 1; - ArgList.push_back(ArgInfo(TypeLoc, ArgTy, - AttributeSet::get(ArgTy->getContext(), - AttrIndex++, Attrs), Name)); + ArgList.emplace_back(TypeLoc, ArgTy, AttributeSet::get(ArgTy->getContext(), + AttrIndex++, Attrs), + std::move(Name)); while (EatIfPresent(lltok::comma)) { // Handle ... at end of arg list. @@ -1930,10 +1930,10 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, if (!ArgTy->isFirstClassType()) return Error(TypeLoc, "invalid type for function argument"); - ArgList.push_back(ArgInfo(TypeLoc, ArgTy, - AttributeSet::get(ArgTy->getContext(), - AttrIndex++, Attrs), - Name)); + ArgList.emplace_back( + TypeLoc, ArgTy, + AttributeSet::get(ArgTy->getContext(), AttrIndex++, Attrs), + std::move(Name)); } } @@ -3730,7 +3730,7 @@ bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) { OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ OPTIONAL(type, MDField, ); \ - OPTIONAL(arg, MDUnsignedField, (0, UINT8_MAX)); \ + OPTIONAL(arg, MDUnsignedField, (0, UINT16_MAX)); \ OPTIONAL(flags, DIFlagField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index f6d5ccc1a59e..056d87beef15 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -63,9 +63,7 @@ public: // vector compatibility methods unsigned size() const { return ValuePtrs.size(); } void resize(unsigned N) { ValuePtrs.resize(N); } - void push_back(Value *V) { - ValuePtrs.push_back(V); - } + void push_back(Value *V) { ValuePtrs.emplace_back(V); } void clear() { assert(ResolveConstants.empty() && "Constants not resolved?"); @@ -1499,6 +1497,8 @@ std::error_code BitcodeReader::ParseTypeTableBody() { case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] if (Record.size() < 2) return Error("Invalid record"); + if (Record[0] == 0) + return Error("Invalid vector length"); ResultTy = getTypeByID(Record[1]); if (!ResultTy || !StructType::isValidElementType(ResultTy)) return Error("Invalid type"); @@ -1636,9 +1636,9 @@ std::error_code BitcodeReader::ParseMetadata() { Record.clear(); Code = Stream.ReadCode(); - // METADATA_NAME is always followed by METADATA_NAMED_NODE. unsigned NextBitCode = Stream.readRecord(Code, Record); - assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode; + if (NextBitCode != bitc::METADATA_NAMED_NODE) + return Error("METADATA_NAME not followed by METADATA_NAMED_NODE"); // Read named metadata elements. unsigned Size = Record.size(); @@ -2065,10 +2065,13 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() { if (ValID >= ValueList.size()) { AliasInits.push_back(AliasInitWorklist.back()); } else { - if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID])) - AliasInitWorklist.back().first->setAliasee(C); - else + Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]); + if (!C) return Error("Expected a constant"); + GlobalAlias *Alias = AliasInitWorklist.back().first; + if (C->getType() != Alias->getType()) + return Error("Alias and aliasee types don't match"); + Alias->setAliasee(C); } AliasInitWorklist.pop_back(); } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 58b87e12912c..5fe4c4bcaec4 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -163,7 +163,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // all callee-saved registers. In non-return this is any // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); - BitVector Pristine = MFI->getPristineRegs(BB); + BitVector Pristine = MFI->getPristineRegs(MF); for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp index 2487aba448cb..8c6838394ac9 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -38,7 +38,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { Entries[I.second.Number] = I.second.TLS ? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first) - : MCSymbolRefExpr::Create(I.first, Asm.OutContext); + : MCSymbolRefExpr::create(I.first, Asm.OutContext); for (const MCExpr *Entry : Entries) Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize()); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 206be702e724..2e3b83a09520 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" -#include "Win64Exception.h" +#include "WinException.h" #include "WinCodeViewLineTables.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" @@ -40,7 +40,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" @@ -268,8 +268,9 @@ bool AsmPrinter::doInitialization(Module &M) { default: llvm_unreachable("unsupported unwinding information encoding"); case WinEH::EncodingType::Invalid: break; + case WinEH::EncodingType::X86: case WinEH::EncodingType::Itanium: - ES = new Win64Exception(this); + ES = new WinException(this); break; } break; @@ -511,7 +512,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 - OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); + OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym), + MCConstantExpr::create(Size, OutContext)); OutStreamer->AddBlankLine(); } @@ -565,7 +567,7 @@ void AsmPrinter::EmitFunctionHeader() { MCSymbol *CurPos = OutContext.createTempSymbol(); OutStreamer->EmitLabel(CurPos); OutStreamer->EmitAssignment(CurrentFnBegin, - MCSymbolRefExpr::Create(CurPos, OutContext)); + MCSymbolRefExpr::create(CurPos, OutContext)); } else { OutStreamer->EmitLabel(CurrentFnBegin); } @@ -775,7 +777,7 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { // Emit a symbol assignment. OutStreamer->EmitAssignment(FrameAllocSym, - MCConstantExpr::Create(FrameOffset, OutContext)); + MCConstantExpr::create(FrameOffset, OutContext)); } /// EmitFunctionBody - This method emits the body and trailer for a @@ -899,11 +901,11 @@ void AsmPrinter::EmitFunctionBody() { // We can get the size as difference between the function label and the // temp label. const MCExpr *SizeExp = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext), - MCSymbolRefExpr::Create(CurrentFnSymForSize, + MCBinaryExpr::createSub(MCSymbolRefExpr::create(CurrentFnEnd, OutContext), + MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext); - OutStreamer->EmitELFSize(CurrentFnSym, SizeExp); + OutStreamer->emitELFSize(cast<MCSymbolELF>(CurrentFnSym), SizeExp); } for (const HandlerInfo &HI : Handlers) { @@ -1325,9 +1327,9 @@ void AsmPrinter::EmitJumpTableInfo() { // .set LJTSet, LBB32-base const MCExpr *LHS = - MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), - MCBinaryExpr::CreateSub(LHS, Base, + MCBinaryExpr::createSub(LHS, Base, OutContext)); } } @@ -1367,14 +1369,14 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, case MachineJumpTableInfo::EK_BlockAddress: // EK_BlockAddress - Each entry is a plain address of block, e.g.: // .word LBB123 - Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); break; case MachineJumpTableInfo::EK_GPRel32BlockAddress: { // EK_GPRel32BlockAddress - Each entry is an address of block, encoded // with a relocation as gp-relative, e.g.: // .gprel32 LBB123 MCSymbol *MBBSym = MBB->getSymbol(); - OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext)); return; } @@ -1383,7 +1385,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // with a relocation as gp-relative, e.g.: // .gpdword LBB123 MCSymbol *MBBSym = MBB->getSymbol(); - OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext)); return; } @@ -1396,14 +1398,14 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // .set L4_5_set_123, LBB123 - LJTI1_2 // .word L4_5_set_123 if (MAI->doesSetDirectiveSuppressesReloc()) { - Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()), + Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()), OutContext); break; } - Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext); - Value = MCBinaryExpr::CreateSub(Value, Base, OutContext); + Value = MCBinaryExpr::createSub(Value, Base, OutContext); break; } } @@ -1595,8 +1597,8 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, // Get the Hi-Lo expression. const MCExpr *Diff = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext), - MCSymbolRefExpr::Create(Lo, OutContext), + MCBinaryExpr::createSub(MCSymbolRefExpr::create(Hi, OutContext), + MCSymbolRefExpr::create(Lo, OutContext), OutContext); if (!MAI->doesSetDirectiveSuppressesReloc()) { @@ -1622,10 +1624,10 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, } // Emit Label+Offset (or just Label if Offset is zero) - const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); + const MCExpr *Expr = MCSymbolRefExpr::create(Label, OutContext); if (Offset) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(Offset, OutContext), OutContext); + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(Offset, OutContext), OutContext); OutStreamer->EmitValue(Expr, Size); } @@ -1662,16 +1664,16 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { MCContext &Ctx = OutContext; if (CV->isNullValue() || isa<UndefValue>(CV)) - return MCConstantExpr::Create(0, Ctx); + return MCConstantExpr::create(0, Ctx); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) - return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + return MCConstantExpr::create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return MCSymbolRefExpr::Create(getSymbol(GV), Ctx); + return MCSymbolRefExpr::create(getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) - return MCSymbolRefExpr::Create(GetBlockAddressSymbol(BA), Ctx); + return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx); const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); if (!CE) { @@ -1712,7 +1714,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return Base; int64_t Offset = OffsetAI.getSExtValue(); - return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), Ctx); } @@ -1755,8 +1757,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // the high bits so we are sure to get a proper truncation if the input is // a constant expr. unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); - return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); } // The MC library also has a right-shift operator, but it isn't consistently @@ -1774,15 +1776,15 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { const MCExpr *RHS = lowerConstant(CE->getOperand(1)); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); - case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); - case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); - case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); - case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); - case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); - case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); - case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); - case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); + case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx); + case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx); + case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx); + case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx); + case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx); + case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx); + case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx); + case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx); } } } @@ -2106,13 +2108,13 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // cstexpr := <gotequiv> - "." + <cst> // cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst> // - // After canonicalization by EvaluateAsRelocatable `ME` turns into: + // After canonicalization by evaluateAsRelocatable `ME` turns into: // // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where // gotpcrelcst := <offset from @foo base> + <cst> // MCValue MV; - if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) + if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) return; const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol(); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 3258961bfb11..7dbfddf60691 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -254,40 +254,34 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { } void AsmPrinter::emitDwarfDIE(const DIE &Die) const { - // Get the abbreviation for this DIE. - const DIEAbbrev &Abbrev = Die.getAbbrev(); - // Emit the code (index) for the abbreviation. if (isVerbose()) - OutStreamer->AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + - "] 0x" + Twine::utohexstr(Die.getOffset()) + - ":0x" + Twine::utohexstr(Die.getSize()) + " " + - dwarf::TagString(Abbrev.getTag())); - EmitULEB128(Abbrev.getNumber()); - - const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + OutStreamer->AddComment("Abbrev [" + Twine(Die.getAbbrevNumber()) + "] 0x" + + Twine::utohexstr(Die.getOffset()) + ":0x" + + Twine::utohexstr(Die.getSize()) + " " + + dwarf::TagString(Die.getTag())); + EmitULEB128(Die.getAbbrevNumber()); // Emit the DIE attribute values. - for (unsigned i = 0, N = Values.size(); i < N; ++i) { - dwarf::Attribute Attr = AbbrevData[i].getAttribute(); - dwarf::Form Form = AbbrevData[i].getForm(); + for (const auto &V : Die.values()) { + dwarf::Attribute Attr = V.getAttribute(); + dwarf::Form Form = V.getForm(); assert(Form && "Too many attributes for DIE (check abbreviation)"); if (isVerbose()) { OutStreamer->AddComment(dwarf::AttributeString(Attr)); if (Attr == dwarf::DW_AT_accessibility) - OutStreamer->AddComment(dwarf::AccessibilityString( - cast<DIEInteger>(Values[i])->getValue())); + OutStreamer->AddComment( + dwarf::AccessibilityString(V.getDIEInteger().getValue())); } // Emit an attribute using the defined form. - Values[i]->EmitValue(this, Form); + V.EmitValue(this, Form); } // Emit the DIE children if any. - if (Abbrev.hasChildren()) { - for (auto &Child : Die.getChildren()) + if (Die.hasChildren()) { + for (auto &Child : Die.children()) emitDwarfDIE(*Child); OutStreamer->AddComment("End Of Children Mark"); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index e7631dd51521..793e62960dd6 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -402,10 +402,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, unsigned OpFlags = MI->getOperand(OpNo).getImm(); ++OpNo; // Skip over the ID number. - if (Modifier[0] == 'l') // labels are target independent + if (Modifier[0] == 'l') { // Labels are target independent. // FIXME: What if the operand isn't an MBB, report error? - OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); - else { + const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); + Sym->print(OS, AP->MAI); + } else { if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, Modifier[0] ? Modifier : nullptr, diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 01d2c7220ab9..f2da8557a522 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -18,7 +18,7 @@ add_llvm_library(LLVMAsmPrinter EHStreamer.cpp ErlangGCPrinter.cpp OcamlGCPrinter.cpp - Win64Exception.cpp + WinException.cpp WinCodeViewLineTables.cpp ) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 1ccffe97b80b..fa8449e94c9f 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -107,6 +107,13 @@ void DIEAbbrev::print(raw_ostream &O) { void DIEAbbrev::dump() { print(dbgs()); } #endif +DIEAbbrev DIE::generateAbbrev() const { + DIEAbbrev Abbrev(Tag, hasChildren()); + for (const DIEValue &V : Values) + Abbrev.AddAttribute(V.getAttribute(), V.getForm()); + return Abbrev; +} + /// Climb up the parent chain to get the unit DIE to which this DIE /// belongs. const DIE *DIE::getUnit() const { @@ -128,22 +135,19 @@ const DIE *DIE::getUnitOrNull() const { return nullptr; } -DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const { - const SmallVectorImpl<DIEValue *> &Values = getValues(); - const DIEAbbrev &Abbrevs = getAbbrev(); - +DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const { // Iterate through all the attributes until we find the one we're // looking for, if we can't find it return NULL. - for (size_t i = 0; i < Values.size(); ++i) - if (Abbrevs.getData()[i].getAttribute() == Attribute) - return Values[i]; - return nullptr; + for (const auto &V : values()) + if (V.getAttribute() == Attribute) + return V; + return DIEValue(); } #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IndentCount) const { const std::string Indent(IndentCount, ' '); - bool isBlock = Abbrev.getTag() == 0; + bool isBlock = getTag() == 0; if (!isBlock) { O << Indent @@ -153,28 +157,26 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const { << ", Size: " << Size << "\n"; O << Indent - << dwarf::TagString(Abbrev.getTag()) + << dwarf::TagString(getTag()) << " " - << dwarf::ChildrenString(Abbrev.hasChildren()) << "\n"; + << dwarf::ChildrenString(hasChildren()) << "\n"; } else { O << "Size: " << Size << "\n"; } - const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData(); - IndentCount += 2; - for (unsigned i = 0, N = Data.size(); i < N; ++i) { + for (unsigned i = 0, N = Values.size(); i < N; ++i) { O << Indent; if (!isBlock) - O << dwarf::AttributeString(Data[i].getAttribute()); + O << dwarf::AttributeString(Values[i].getAttribute()); else O << "Blk[" << i << "]"; O << " " - << dwarf::FormEncodingString(Data[i].getForm()) + << dwarf::FormEncodingString(Values[i].getForm()) << " "; - Values[i]->print(O); + Values[i].print(O); O << "\n"; } IndentCount -= 2; @@ -193,40 +195,24 @@ void DIE::dump() { void DIEValue::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { switch (Ty) { -#define EMIT_VALUE_IMPL(Kind) \ - case is##Kind: \ - cast<DIE##Kind>(this)->EmitValueImpl(AP, Form); \ + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + getDIE##T().EmitValue(AP, Form); \ break; - EMIT_VALUE_IMPL(Integer) - EMIT_VALUE_IMPL(String) - EMIT_VALUE_IMPL(Expr) - EMIT_VALUE_IMPL(Label) - EMIT_VALUE_IMPL(Delta) - EMIT_VALUE_IMPL(Entry) - EMIT_VALUE_IMPL(TypeSignature) - EMIT_VALUE_IMPL(Block) - EMIT_VALUE_IMPL(Loc) - EMIT_VALUE_IMPL(LocList) -#undef EMIT_VALUE_IMPL +#include "llvm/CodeGen/DIEValue.def" } } unsigned DIEValue::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Ty) { -#define SIZE_OF_IMPL(Kind) \ - case is##Kind: \ - return cast<DIE##Kind>(this)->SizeOfImpl(AP, Form); - SIZE_OF_IMPL(Integer) - SIZE_OF_IMPL(String) - SIZE_OF_IMPL(Expr) - SIZE_OF_IMPL(Label) - SIZE_OF_IMPL(Delta) - SIZE_OF_IMPL(Entry) - SIZE_OF_IMPL(TypeSignature) - SIZE_OF_IMPL(Block) - SIZE_OF_IMPL(Loc) - SIZE_OF_IMPL(LocList) -#undef SIZE_OF_IMPL + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + return getDIE##T().SizeOf(AP, Form); +#include "llvm/CodeGen/DIEValue.def" } llvm_unreachable("Unknown DIE kind"); } @@ -234,21 +220,13 @@ unsigned DIEValue::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { #ifndef NDEBUG void DIEValue::print(raw_ostream &O) const { switch (Ty) { -#define PRINT_IMPL(Kind) \ - case is##Kind: \ - cast<DIE##Kind>(this)->printImpl(O); \ + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + getDIE##T().print(O); \ break; - PRINT_IMPL(Integer) - PRINT_IMPL(String) - PRINT_IMPL(Expr) - PRINT_IMPL(Label) - PRINT_IMPL(Delta) - PRINT_IMPL(Entry) - PRINT_IMPL(TypeSignature) - PRINT_IMPL(Block) - PRINT_IMPL(Loc) - PRINT_IMPL(LocList) -#undef PRINT_IMPL +#include "llvm/CodeGen/DIEValue.def" } } @@ -263,7 +241,7 @@ void DIEValue::dump() const { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { +void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { unsigned Size = ~0U; switch (Form) { case dwarf::DW_FORM_flag_present: @@ -299,7 +277,7 @@ void DIEInteger::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru @@ -328,7 +306,7 @@ unsigned DIEInteger::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIEInteger::printImpl(raw_ostream &O) const { +void DIEInteger::print(raw_ostream &O) const { O << "Int: " << (int64_t)Integer << " 0x"; O.write_hex(Integer); } @@ -340,13 +318,13 @@ void DIEInteger::printImpl(raw_ostream &O) const { /// EmitValue - Emit expression value. /// -void DIEExpr::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { +void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. /// -unsigned DIEExpr::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -354,7 +332,7 @@ unsigned DIEExpr::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIEExpr::printImpl(raw_ostream &O) const { O << "Expr: " << *Expr; } +void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } #endif //===----------------------------------------------------------------------===// @@ -363,7 +341,7 @@ void DIEExpr::printImpl(raw_ostream &O) const { O << "Expr: " << *Expr; } /// EmitValue - Emit label value. /// -void DIELabel::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { +void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelReference(Label, SizeOf(AP, Form), Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset || @@ -372,7 +350,7 @@ void DIELabel::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -380,9 +358,7 @@ unsigned DIELabel::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIELabel::printImpl(raw_ostream &O) const { - O << "Lbl: " << Label->getName(); -} +void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } #endif //===----------------------------------------------------------------------===// @@ -391,13 +367,13 @@ void DIELabel::printImpl(raw_ostream &O) const { /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { +void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -405,7 +381,7 @@ unsigned DIEDelta::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIEDelta::printImpl(raw_ostream &O) const { +void DIEDelta::print(raw_ostream &O) const { O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); } #endif @@ -416,7 +392,7 @@ void DIEDelta::printImpl(raw_ostream &O) const { /// EmitValue - Emit string value. /// -void DIEString::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { +void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { assert( (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && "Expected valid string form"); @@ -440,7 +416,7 @@ void DIEString::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEString::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { assert( (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && "Expected valid string form"); @@ -458,7 +434,7 @@ unsigned DIEString::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIEString::printImpl(raw_ostream &O) const { +void DIEString::print(raw_ostream &O) const { O << "String: " << S.getString(); } #endif @@ -469,16 +445,16 @@ void DIEString::printImpl(raw_ostream &O) const { /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { +void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_ref_addr) { const DwarfDebug *DD = AP->getDwarfDebug(); - unsigned Addr = Entry.getOffset(); + unsigned Addr = Entry->getOffset(); assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations."); // For DW_FORM_ref_addr, output the offset from beginning of debug info // section. Entry->getOffset() returns the offset from start of the // compile unit. - DwarfCompileUnit *CU = DD->lookupUnit(Entry.getUnit()); + DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit()); assert(CU && "CUDie should belong to a CU."); Addr += CU->getDebugInfoOffset(); if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) @@ -487,7 +463,7 @@ void DIEEntry::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { else AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP)); } else - AP->EmitInt32(Entry.getOffset()); + AP->EmitInt32(Entry->getOffset()); } unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { @@ -503,7 +479,7 @@ unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { } #ifndef NDEBUG -void DIEEntry::printImpl(raw_ostream &O) const { +void DIEEntry::print(raw_ostream &O) const { O << format("Die: 0x%lx", (long)(intptr_t)&Entry); } #endif @@ -511,14 +487,15 @@ void DIEEntry::printImpl(raw_ostream &O) const { //===----------------------------------------------------------------------===// // DIETypeSignature Implementation //===----------------------------------------------------------------------===// -void DIETypeSignature::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { +void DIETypeSignature::EmitValue(const AsmPrinter *Asm, + dwarf::Form Form) const { assert(Form == dwarf::DW_FORM_ref_sig8); - Asm->OutStreamer->EmitIntValue(Unit.getTypeSignature(), 8); + Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8); } #ifndef NDEBUG -void DIETypeSignature::printImpl(raw_ostream &O) const { - O << format("Type Unit: 0x%lx", Unit.getTypeSignature()); +void DIETypeSignature::print(raw_ostream &O) const { + O << format("Type Unit: 0x%lx", Unit->getTypeSignature()); } #endif @@ -530,9 +507,8 @@ void DIETypeSignature::printImpl(raw_ostream &O) const { /// unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { if (!Size) { - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) - Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); + Size += Values[i].SizeOf(AP, Values[i].getForm()); } return Size; @@ -540,7 +516,7 @@ unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { /// EmitValue - Emit location data. /// -void DIELoc::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { +void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -551,14 +527,13 @@ void DIELoc::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { Asm->EmitULEB128(Size); break; } - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) - Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); + Values[i].EmitValue(Asm, Values[i].getForm()); } /// SizeOf - Determine size of location data in bytes. /// -unsigned DIELoc::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -571,7 +546,7 @@ unsigned DIELoc::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIELoc::printImpl(raw_ostream &O) const { +void DIELoc::print(raw_ostream &O) const { O << "ExprLoc: "; DIE::print(O, 5); } @@ -585,9 +560,8 @@ void DIELoc::printImpl(raw_ostream &O) const { /// unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { if (!Size) { - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) - Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); + Size += Values[i].SizeOf(AP, Values[i].getForm()); } return Size; @@ -595,7 +569,7 @@ unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { +void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -604,14 +578,13 @@ void DIEBlock::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; } - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) - Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); + Values[i].EmitValue(Asm, Values[i].getForm()); } /// SizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -622,7 +595,7 @@ unsigned DIEBlock::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIEBlock::printImpl(raw_ostream &O) const { +void DIEBlock::print(raw_ostream &O) const { O << "Blk: "; DIE::print(O, 5); } @@ -632,7 +605,7 @@ void DIEBlock::printImpl(raw_ostream &O) const { // DIELocList Implementation //===----------------------------------------------------------------------===// -unsigned DIELocList::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) @@ -642,7 +615,7 @@ unsigned DIELocList::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { /// EmitValue - Emit label value. /// -void DIELocList::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { +void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { DwarfDebug *DD = AP->getDwarfDebug(); MCSymbol *Label = DD->getDebugLocs().getList(Index).Label; @@ -653,8 +626,5 @@ void DIELocList::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIELocList::printImpl(raw_ostream &O) const { - O << "LocList: " << Index; - -} +void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; } #endif diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index a2e5aad96570..1445254e6c28 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -31,19 +31,12 @@ using namespace llvm; /// \brief Grabs the string in whichever attribute is passed in and returns /// a reference to it. static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { - const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); - const DIEAbbrev &Abbrevs = Die.getAbbrev(); - // Iterate through all the attributes until we find the one we're // looking for, if we can't find it return an empty string. - for (size_t i = 0; i < Values.size(); ++i) { - if (Abbrevs.getData()[i].getAttribute() == Attr) { - DIEValue *V = Values[i]; - assert(isa<DIEString>(V) && "String requested. Not a string."); - DIEString *S = cast<DIEString>(V); - return S->getString(); - } - } + for (const auto &V : Die.values()) + if (V.getAttribute() == Attr) + return V.getDIEString().getString(); + return StringRef(""); } @@ -123,20 +116,16 @@ void DIEHash::addParentContext(const DIE &Parent) { // Collect all of the attributes for a particular DIE in single structure. void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { - const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); - const DIEAbbrev &Abbrevs = Die.getAbbrev(); - #define COLLECT_ATTR(NAME) \ case dwarf::NAME: \ - Attrs.NAME.Val = Values[i]; \ - Attrs.NAME.Desc = &Abbrevs.getData()[i]; \ + Attrs.NAME = V; \ break - for (size_t i = 0, e = Values.size(); i != e; ++i) { + for (const auto &V : Die.values()) { DEBUG(dbgs() << "Attribute: " - << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute()) + << dwarf::AttributeString(V.getAttribute()) << " added.\n"); - switch (Abbrevs.getData()[i].getAttribute()) { + switch (V.getAttribute()) { COLLECT_ATTR(DW_AT_name); COLLECT_ATTR(DW_AT_accessibility); COLLECT_ATTR(DW_AT_address_class); @@ -274,11 +263,9 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, // Hash all of the values in a block like set of values. This assumes that // all of the data is going to be added as integers. -void DIEHash::hashBlockData(const SmallVectorImpl<DIEValue *> &Values) { - for (SmallVectorImpl<DIEValue *>::const_iterator I = Values.begin(), - E = Values.end(); - I != E; ++I) - Hash.update((uint64_t)cast<DIEInteger>(*I)->getValue()); +void DIEHash::hashBlockData(const DIE::value_range &Values) { + for (const auto &V : Values) + Hash.update((uint64_t)V.getDIEInteger().getValue()); } // Hash the contents of a loclistptr class. @@ -292,10 +279,8 @@ void DIEHash::hashLocList(const DIELocList &LocList) { // Hash an individual attribute \param Attr based on the type of attribute and // the form. -void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { - const DIEValue *Value = Attr.Val; - const DIEAbbrevData *Desc = Attr.Desc; - dwarf::Attribute Attribute = Desc->getAttribute(); +void DIEHash::hashAttribute(DIEValue Value, dwarf::Tag Tag) { + dwarf::Attribute Attribute = Value.getAttribute(); // Other attribute values use the letter 'A' as the marker, and the value // consists of the form code (encoded as an unsigned LEB128 value) followed by @@ -304,17 +289,20 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag, // DW_FORM_string, and DW_FORM_block. - switch (Value->getType()) { + switch (Value.getType()) { + case DIEValue::isNone: + llvm_unreachable("Expected valid DIEValue"); + // 7.27 Step 3 // ... An attribute that refers to another type entry T is processed as // follows: case DIEValue::isEntry: - hashDIEEntry(Attribute, Tag, cast<DIEEntry>(Value)->getEntry()); + hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry()); break; case DIEValue::isInteger: { addULEB128('A'); addULEB128(Attribute); - switch (Desc->getForm()) { + switch (Value.getForm()) { case dwarf::DW_FORM_data1: case dwarf::DW_FORM_data2: case dwarf::DW_FORM_data4: @@ -322,14 +310,14 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { case dwarf::DW_FORM_udata: case dwarf::DW_FORM_sdata: addULEB128(dwarf::DW_FORM_sdata); - addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + addSLEB128((int64_t)Value.getDIEInteger().getValue()); break; // DW_FORM_flag_present is just flag with a value of one. We still give it a // value so just use the value. case dwarf::DW_FORM_flag_present: case dwarf::DW_FORM_flag: addULEB128(dwarf::DW_FORM_flag); - addULEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + addULEB128((int64_t)Value.getDIEInteger().getValue()); break; default: llvm_unreachable("Unknown integer form!"); @@ -340,7 +328,7 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { addULEB128('A'); addULEB128(Attribute); addULEB128(dwarf::DW_FORM_string); - addString(cast<DIEString>(Value)->getString()); + addString(Value.getDIEString().getString()); break; case DIEValue::isBlock: case DIEValue::isLoc: @@ -348,17 +336,17 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { addULEB128('A'); addULEB128(Attribute); addULEB128(dwarf::DW_FORM_block); - if (isa<DIEBlock>(Value)) { - addULEB128(cast<DIEBlock>(Value)->ComputeSize(AP)); - hashBlockData(cast<DIEBlock>(Value)->getValues()); - } else if (isa<DIELoc>(Value)) { - addULEB128(cast<DIELoc>(Value)->ComputeSize(AP)); - hashBlockData(cast<DIELoc>(Value)->getValues()); + if (Value.getType() == DIEValue::isBlock) { + addULEB128(Value.getDIEBlock().ComputeSize(AP)); + hashBlockData(Value.getDIEBlock().values()); + } else if (Value.getType() == DIEValue::isLoc) { + addULEB128(Value.getDIELoc().ComputeSize(AP)); + hashBlockData(Value.getDIELoc().values()); } else { // We could add the block length, but that would take // a bit of work and not add a lot of uniqueness // to the hash in some way we could test. - hashLocList(*cast<DIELocList>(Value)); + hashLocList(Value.getDIELocList()); } break; // FIXME: It's uncertain whether or not we should handle this at the moment. @@ -375,7 +363,7 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { #define ADD_ATTR(ATTR) \ { \ - if (ATTR.Val != 0) \ + if (ATTR) \ hashAttribute(ATTR, Tag); \ } @@ -463,7 +451,7 @@ void DIEHash::computeHash(const DIE &Die) { addAttributes(Die); // Then hash each of the children of the DIE. - for (auto &C : Die.getChildren()) { + for (auto &C : Die.children()) { // 7.27 Step 7 // If C is a nested type entry or a member function entry, ... if (isType(C->getTag()) || C->getTag() == dwarf::DW_TAG_subprogram) { diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index ac014b727b75..1850e042f924 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -26,64 +26,57 @@ class CompileUnit; /// \brief An object containing the capability of hashing and adding hash /// attributes onto a DIE. class DIEHash { - - // The entry for a particular attribute. - struct AttrEntry { - const DIEValue *Val; - const DIEAbbrevData *Desc; - }; - // Collection of all attributes used in hashing a particular DIE. struct DIEAttrs { - AttrEntry DW_AT_name; - AttrEntry DW_AT_accessibility; - AttrEntry DW_AT_address_class; - AttrEntry DW_AT_allocated; - AttrEntry DW_AT_artificial; - AttrEntry DW_AT_associated; - AttrEntry DW_AT_binary_scale; - AttrEntry DW_AT_bit_offset; - AttrEntry DW_AT_bit_size; - AttrEntry DW_AT_bit_stride; - AttrEntry DW_AT_byte_size; - AttrEntry DW_AT_byte_stride; - AttrEntry DW_AT_const_expr; - AttrEntry DW_AT_const_value; - AttrEntry DW_AT_containing_type; - AttrEntry DW_AT_count; - AttrEntry DW_AT_data_bit_offset; - AttrEntry DW_AT_data_location; - AttrEntry DW_AT_data_member_location; - AttrEntry DW_AT_decimal_scale; - AttrEntry DW_AT_decimal_sign; - AttrEntry DW_AT_default_value; - AttrEntry DW_AT_digit_count; - AttrEntry DW_AT_discr; - AttrEntry DW_AT_discr_list; - AttrEntry DW_AT_discr_value; - AttrEntry DW_AT_encoding; - AttrEntry DW_AT_enum_class; - AttrEntry DW_AT_endianity; - AttrEntry DW_AT_explicit; - AttrEntry DW_AT_is_optional; - AttrEntry DW_AT_location; - AttrEntry DW_AT_lower_bound; - AttrEntry DW_AT_mutable; - AttrEntry DW_AT_ordering; - AttrEntry DW_AT_picture_string; - AttrEntry DW_AT_prototyped; - AttrEntry DW_AT_small; - AttrEntry DW_AT_segment; - AttrEntry DW_AT_string_length; - AttrEntry DW_AT_threads_scaled; - AttrEntry DW_AT_upper_bound; - AttrEntry DW_AT_use_location; - AttrEntry DW_AT_use_UTF8; - AttrEntry DW_AT_variable_parameter; - AttrEntry DW_AT_virtuality; - AttrEntry DW_AT_visibility; - AttrEntry DW_AT_vtable_elem_location; - AttrEntry DW_AT_type; + DIEValue DW_AT_name; + DIEValue DW_AT_accessibility; + DIEValue DW_AT_address_class; + DIEValue DW_AT_allocated; + DIEValue DW_AT_artificial; + DIEValue DW_AT_associated; + DIEValue DW_AT_binary_scale; + DIEValue DW_AT_bit_offset; + DIEValue DW_AT_bit_size; + DIEValue DW_AT_bit_stride; + DIEValue DW_AT_byte_size; + DIEValue DW_AT_byte_stride; + DIEValue DW_AT_const_expr; + DIEValue DW_AT_const_value; + DIEValue DW_AT_containing_type; + DIEValue DW_AT_count; + DIEValue DW_AT_data_bit_offset; + DIEValue DW_AT_data_location; + DIEValue DW_AT_data_member_location; + DIEValue DW_AT_decimal_scale; + DIEValue DW_AT_decimal_sign; + DIEValue DW_AT_default_value; + DIEValue DW_AT_digit_count; + DIEValue DW_AT_discr; + DIEValue DW_AT_discr_list; + DIEValue DW_AT_discr_value; + DIEValue DW_AT_encoding; + DIEValue DW_AT_enum_class; + DIEValue DW_AT_endianity; + DIEValue DW_AT_explicit; + DIEValue DW_AT_is_optional; + DIEValue DW_AT_location; + DIEValue DW_AT_lower_bound; + DIEValue DW_AT_mutable; + DIEValue DW_AT_ordering; + DIEValue DW_AT_picture_string; + DIEValue DW_AT_prototyped; + DIEValue DW_AT_small; + DIEValue DW_AT_segment; + DIEValue DW_AT_string_length; + DIEValue DW_AT_threads_scaled; + DIEValue DW_AT_upper_bound; + DIEValue DW_AT_use_location; + DIEValue DW_AT_use_UTF8; + DIEValue DW_AT_variable_parameter; + DIEValue DW_AT_virtuality; + DIEValue DW_AT_visibility; + DIEValue DW_AT_vtable_elem_location; + DIEValue DW_AT_type; // Insert any additional ones here... }; @@ -135,13 +128,13 @@ private: /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or /// DW_FORM_exprloc. - void hashBlockData(const SmallVectorImpl<DIEValue *> &Values); + void hashBlockData(const DIE::value_range &Values); /// \brief Hashes the contents pointed to in the .debug_loc section. void hashLocList(const DIELocList &LocList); /// \brief Hashes an individual attribute. - void hashAttribute(AttrEntry Attr, dwarf::Tag Tag); + void hashAttribute(DIEValue Value, dwarf::Tag Tag); /// \brief Hashes an attribute that refers to another DIE. void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 58b406b788fb..f8cdde203187 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -192,9 +192,9 @@ void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { PrevHash = HashValue; Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); MCContext &Context = Asm->OutStreamer->getContext(); - const MCExpr *Sub = MCBinaryExpr::CreateSub( - MCSymbolRefExpr::Create((*HI)->Sym, Context), - MCSymbolRefExpr::Create(SecBegin, Context), Context); + const MCExpr *Sub = MCBinaryExpr::createSub( + MCSymbolRefExpr::create((*HI)->Sym, Context), + MCSymbolRefExpr::create(SecBegin, Context), Context); Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t)); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index c10e70352af0..689184a651ed 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -42,8 +42,7 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, DD->addArangeLabel(SymbolCU(this, Label)); unsigned idx = DD->getAddressPool().getIndex(Label); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); + Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, DIEInteger(idx)); } void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, @@ -52,9 +51,10 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, if (Label) DD->addArangeLabel(SymbolCU(this, Label)); - Die.addValue(Attribute, dwarf::DW_FORM_addr, - Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label) - : new (DIEValueAllocator) DIEInteger(0)); + if (Label) + Die.addValue(Attribute, dwarf::DW_FORM_addr, DIELabel(Label)); + else + Die.addValue(Attribute, dwarf::DW_FORM_addr, DIEInteger(0)); } unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, @@ -145,7 +145,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( bool addToAccelTable = false; if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) { addToAccelTable = true; - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; const MCSymbol *Sym = Asm->getSymbol(Global); if (Global->isThreadLocal()) { // FIXME: Make this work with -gsplit-dwarf. @@ -183,7 +183,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) { addToAccelTable = true; // GV is a merged global. - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; Value *Ptr = CE->getOperand(0); MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr)); DD->addArangeLabel(SymbolCU(this, Sym)); @@ -242,7 +242,7 @@ void DwarfCompileUnit::initStmtList() { MCSymbol *LineTableStartSym = Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); - stmtListIndex = UnitDie.getValues().size(); + stmtListIndex = std::distance(UnitDie.values_begin(), UnitDie.values_end()); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. For split dwarf this is @@ -255,9 +255,7 @@ void DwarfCompileUnit::initStmtList() { } void DwarfCompileUnit::applyStmtList(DIE &D) { - D.addValue(dwarf::DW_AT_stmt_list, - UnitDie.getAbbrev().getData()[stmtListIndex].getForm(), - UnitDie.getValues()[stmtListIndex]); + D.addValue(UnitDie.values_begin()[stmtListIndex]); } void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, @@ -365,10 +363,9 @@ void DwarfCompileUnit::constructScopeDIE( void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset : dwarf::DW_FORM_data4, - Value); + new (DIEValueAllocator) DIEDelta(Hi, Lo)); } void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, @@ -515,7 +512,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, return VariableDie; auto Expr = DV.getExpression().begin(); - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); for (auto FI : DV.getFrameIndex()) { unsigned FrameReg = 0; @@ -739,7 +736,7 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, /// Add an address attribute to a die based on the location provided. void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; bool validReg; if (Location.isReg()) @@ -761,7 +758,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); assert(DV.getExpression().size() == 1); const DIExpression *Expr = DV.getExpression().back(); @@ -782,10 +779,9 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, /// Add a Dwarf loclistptr attribute data and value. void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index) { - DIEValue *Value = new (DIEValueAllocator) DIELocList(Index); dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset : dwarf::DW_FORM_data4; - Die.addValue(Attribute, Form, Value); + Die.addValue(Attribute, Form, DIELocList(Index)); } void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, @@ -802,8 +798,7 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, /// Add a Dwarf expression attribute data and value. void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) { - DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); - Die.addValue((dwarf::Attribute)0, Form, Value); + Die.addValue((dwarf::Attribute)0, Form, DIEExpr(Expr)); } void DwarfCompileUnit::applySubprogramAttributesToDefinition( diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 105ff6c198f8..3f6665bd5768 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1340,9 +1340,8 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, // We could have a specification DIE that has our most of our knowledge, // look for that now. - DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification); - if (SpecVal) { - DIE &SpecDIE = cast<DIEEntry>(SpecVal)->getEntry(); + if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) { + DIE &SpecDIE = SpecVal.getDIEEntry().getEntry(); if (SpecDIE.findAttribute(dwarf::DW_AT_external)) Linkage = dwarf::GIEL_EXTERNAL; } else if (Die->findAttribute(dwarf::DW_AT_external)) @@ -1563,6 +1562,8 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer->EmitLabel(List.Label); const DwarfCompileUnit *CU = List.CU; for (const auto &Entry : DebugLocs.getEntries(List)) { + if (Entry.BeginSym == Entry.EndSym) + continue; // Set up the range. This range is relative to the entry point of the // compile unit. This is a hard coded 0 for low_pc when we're emitting // ranges, or the DW_AT_low_pc on the compile unit otherwise. diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index a2799b8d6300..d56982712d53 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -65,6 +65,11 @@ void DwarfExpression::AddShr(unsigned ShiftBy) { EmitOp(dwarf::DW_OP_shr); } +void DwarfExpression::AddOpStackValue() { + if (DwarfVersion >= 4) + EmitOp(dwarf::DW_OP_stack_value); +} + bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { if (isFrameRegister(MachineReg)) { // If variable offset is based in frame register then use fbreg. @@ -172,16 +177,14 @@ void DwarfExpression::AddSignedConstant(int Value) { // value, so the producers and consumers started to rely on heuristics // to disambiguate the value vs. location status of the expression. // See PR21176 for more details. - if (DwarfVersion >= 4) - EmitOp(dwarf::DW_OP_stack_value); + AddOpStackValue(); } void DwarfExpression::AddUnsignedConstant(unsigned Value) { EmitOp(dwarf::DW_OP_constu); EmitUnsigned(Value); // cf. comment in DwarfExpression::AddSignedConstant(). - if (DwarfVersion >= 4) - EmitOp(dwarf::DW_OP_stack_value); + AddOpStackValue(); } static unsigned getOffsetOrZero(unsigned OffsetInBits, @@ -212,15 +215,30 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); } case dwarf::DW_OP_plus: { - // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. auto N = I.getNext(); + unsigned Offset = I->getArg(0); + // First combine all DW_OP_plus until we hit either a DW_OP_deref or a + // DW_OP_bit_piece + while (N != E && N->getOp() == dwarf::DW_OP_plus) { + Offset += N->getArg(0); + ++I; + N = I.getNext(); + } if (N != E && N->getOp() == dwarf::DW_OP_deref) { - unsigned Offset = I->getArg(0); + // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. ValidReg = AddMachineRegIndirect(MachineReg, Offset); std::advance(I, 2); - break; - } else - ValidReg = AddMachineRegPiece(MachineReg); + } else { + assert ((N == E) || (N->getOp() == dwarf::DW_OP_bit_piece)); + if (Offset == 0) { + ValidReg = AddMachineRegPiece(MachineReg); + } else { + ValidReg = AddMachineRegIndirect(MachineReg, Offset); + AddOpStackValue(); + } + ++I; + } + break; } case dwarf::DW_OP_deref: { // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. @@ -237,6 +255,7 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, // Emit remaining elements of the expression. AddExpression(I, E, PieceOffsetInBits); + return true; } diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index 78ec937a6b60..f6249fff4253 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -83,6 +83,9 @@ public: bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0, unsigned PieceOffsetInBits = 0); + /// Emit a DW_OP_stack_value + void AddOpStackValue(); + /// Emit a signed constant. void AddSignedConstant(int Value); /// Emit an unsigned constant. diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 10b58d4e86ae..5ef333c4cf44 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -20,25 +20,34 @@ namespace llvm { DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) : Asm(AP), StrPool(DA, *Asm, Pref) {} -DwarfFile::~DwarfFile() {} +DwarfFile::~DwarfFile() { + for (DIEAbbrev *Abbrev : Abbreviations) + Abbrev->~DIEAbbrev(); +} // Define a unique number for the abbreviation. // -void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) { - // Check the set for priors. - DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); - - // If it's newly added. - if (InSet == &Abbrev) { - // Add to abbreviation list. - Abbreviations.push_back(&Abbrev); - - // Assign the vector position + 1 as its number. - Abbrev.setNumber(Abbreviations.size()); - } else { - // Assign existing abbreviation number. - Abbrev.setNumber(InSet->getNumber()); +DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) { + FoldingSetNodeID ID; + DIEAbbrev Abbrev = Die.generateAbbrev(); + Abbrev.Profile(ID); + + void *InsertPos; + if (DIEAbbrev *Existing = + AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) { + Die.setAbbrevNumber(Existing->getNumber()); + return *Existing; } + + // Move the abbreviation to the heap and assign a number. + DIEAbbrev *New = new (AbbrevAllocator) DIEAbbrev(std::move(Abbrev)); + Abbreviations.push_back(New); + New->setNumber(Abbreviations.size()); + Die.setAbbrevNumber(Abbreviations.size()); + + // Store it for lookup. + AbbreviationsSet.InsertNode(New, InsertPos); + return *New; } void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) { @@ -83,10 +92,7 @@ void DwarfFile::computeSizeAndOffsets() { // CU. It returns the offset after laying out the DIE. unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { // Record the abbreviation. - assignAbbrevNumber(Die.getAbbrev()); - - // Get the abbreviation for this DIE. - const DIEAbbrev &Abbrev = Die.getAbbrev(); + const DIEAbbrev &Abbrev = assignAbbrevNumber(Die); // Set DIE offset Die.setOffset(Offset); @@ -94,22 +100,17 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { // Start the size with the size of abbreviation code. Offset += getULEB128Size(Die.getAbbrevNumber()); - const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); - // Size the DIE attribute values. - for (unsigned i = 0, N = Values.size(); i < N; ++i) + for (const auto &V : Die.values()) // Size attribute value. - Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm()); - - // Get the children. - const auto &Children = Die.getChildren(); + Offset += V.SizeOf(Asm, V.getForm()); // Size the DIE children if any. - if (!Children.empty()) { + if (Die.hasChildren()) { + (void)Abbrev; assert(Abbrev.hasChildren() && "Children flag not set"); - for (auto &Child : Children) + for (auto &Child : Die.children()) Offset = computeSizeAndOffset(*Child, Offset); // End of children marker. diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 532ed96c2689..8402027edd6f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -37,6 +37,8 @@ class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; + BumpPtrAllocator AbbrevAllocator; + // Used to uniquely define abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -72,8 +74,11 @@ public: /// \brief Compute the size and offset of all the DIEs. void computeSizeAndOffsets(); - /// \brief Define a unique number for the abbreviation. - void assignAbbrevNumber(DIEAbbrev &Abbrev); + /// Define a unique number for the abbreviation. + /// + /// Compute the abbreviation for \c Die, look up its unique number, and + /// return a reference to it in the uniquing table. + DIEAbbrev &assignAbbrevNumber(DIE &Die); /// \brief Add a unit to the list of CUs. void addUnit(std::unique_ptr<DwarfUnit> U); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 04836c614044..907f6706bc6a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -70,7 +70,6 @@ DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) { assert(UnitTag == dwarf::DW_TAG_compile_unit || UnitTag == dwarf::DW_TAG_type_unit); - DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, @@ -89,11 +88,6 @@ DwarfUnit::~DwarfUnit() { DIELocs[j]->~DIELoc(); } -DIEEntry *DwarfUnit::createDIEEntry(DIE &Entry) { - DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); - return Value; -} - int64_t DwarfUnit::getDefaultLowerBound() const { switch (getLanguage()) { default: @@ -190,18 +184,16 @@ void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) { void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) - Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); + Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEInteger(1)); else - Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); + Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEInteger(1)); } void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) - DIEInteger(Integer); - Die.addValue(Attribute, *Form, Value); + Die.addValue(Attribute, *Form, DIEInteger(Integer)); } void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) { @@ -212,8 +204,7 @@ void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die.addValue(Attribute, *Form, Value); + Die.addValue(Attribute, *Form, DIEInteger(Integer)); } void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, @@ -225,14 +216,12 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { Die.addValue(Attribute, isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp, - new (DIEValueAllocator) DIEString(DU->getStringPool().getEntry(*Asm, String))); } void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label) { - DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die.addValue(Attribute, Form, Value); + Die.addValue(Attribute, Form, DIELabel(Label)); } void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { @@ -265,12 +254,12 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die.addValue(Attribute, dwarf::DW_FORM_data4, Value); + Die.addValue(Attribute, dwarf::DW_FORM_data4, + new (DIEValueAllocator) DIEDelta(Hi, Lo)); } void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) { - addDIEEntry(Die, Attribute, createDIEEntry(Entry)); + addDIEEntry(Die, Attribute, DIEEntry(Entry)); } void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { @@ -281,13 +270,13 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { addFlag(Die, dwarf::DW_AT_declaration); Die.addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8, - new (DIEValueAllocator) DIETypeSignature(Type)); + DIETypeSignature(Type)); } void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, - DIEEntry *Entry) { + DIEEntry Entry) { const DIE *DieCU = Die.getUnitOrNull(); - const DIE *EntryCU = Entry->getEntry().getUnitOrNull(); + const DIE *EntryCU = Entry.getEntry().getUnitOrNull(); if (!DieCU) // We assume that Die belongs to this CU, if it is not linked to any CU yet. DieCU = &getUnitDie(); @@ -301,8 +290,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) { assert(Tag != dwarf::DW_TAG_auto_variable && Tag != dwarf::DW_TAG_arg_variable); - Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag)); - DIE &Die = *Parent.getChildren().back(); + DIE &Die = Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag)); if (N) insertDIE(N, &Die); return Die; @@ -471,7 +459,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // Decode the original location, and use that as the start of the byref // variable's location. - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; bool validReg; if (Location.isReg()) @@ -588,7 +576,7 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, const DIDerivedType *Ty) { void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { assert(MO.isFPImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock; APFloat FPImm = MO.getFPImm()->getValueAPF(); // Get the raw data form of the floating point. @@ -644,7 +632,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { return; } - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock; // Get the raw data form of the large APInt. const uint64_t *Ptr64 = Val.getRawData(); @@ -777,22 +765,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context, void DwarfUnit::addType(DIE &Entity, const DIType *Ty, dwarf::Attribute Attribute) { assert(Ty && "Trying to add a type that doesn't exist?"); - - // Check for pre-existence. - DIEEntry *Entry = getDIEEntry(Ty); - // If it exists then use the existing value. - if (Entry) { - addDIEEntry(Entity, Attribute, Entry); - return; - } - - // Construct type. - DIE *Buffer = getOrCreateTypeDIE(Ty); - - // Set up proxy. - Entry = createDIEEntry(*Buffer); - insertDIEEntry(Ty, Entry); - addDIEEntry(Entity, Attribute, Entry); + addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty))); } std::string DwarfUnit::getParentContextString(const DIScope *Context) const { @@ -969,12 +942,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (unsigned PropertyAttributes = Property->getAttributes()) addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, PropertyAttributes); - - DIEEntry *Entry = getDIEEntry(Element); - if (!Entry) { - Entry = createDIEEntry(ElemDie); - insertDIEEntry(Element, Entry); - } } } @@ -1061,7 +1028,7 @@ void DwarfUnit::constructTemplateValueParameterDIE( else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { // For declaration non-type template parameters (such as global values and // functions) - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; addOpAddress(*Loc, Asm->getSymbol(GV)); // Emit DW_OP_stack_value to use the address as the immediate value of the // parameter, rather than a pointer to it. @@ -1354,7 +1321,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { // expression to extract appropriate offset from vtable. // BaseAddr = ObAddr + *((*ObAddr) - Offset) - DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc(); + DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc; addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); @@ -1393,7 +1360,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { OffsetInBytes = DT->getOffsetInBits() >> 3; if (DD->getDwarfVersion() <= 2) { - DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc(); + DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc; addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); @@ -1417,10 +1384,10 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { dwarf::DW_VIRTUALITY_virtual); // Objective-C properties. - if (MDNode *PNode = DT->getObjCProperty()) - if (DIEEntry *PropertyDie = getDIEEntry(PNode)) + if (DINode *PNode = DT->getObjCProperty()) + if (DIE *PDie = getDIE(PNode)) MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, - PropertyDie); + DIEEntry(*PDie)); if (DT->isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 0d01a9e9fb38..f56c9b4eb13e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -93,10 +93,6 @@ protected: /// information entries. DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - /// Tracks the mapping of unit level debug information descriptors to debug - /// information entries using a DIEEntry proxy. - DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; - /// A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; @@ -111,9 +107,6 @@ protected: // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - // A preallocated DIEValue because 1 is used frequently. - DIEInteger *DIEIntegerOne; - /// The section this unit will be emitted in. MCSection *Section; @@ -150,7 +143,7 @@ public: void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } /// \brief Return true if this compile unit has something to write out. - bool hasContent() const { return !UnitDie.getChildren().empty(); } + bool hasContent() const { return UnitDie.hasChildren(); } /// \brief Get string containing language specific context for a global name. /// @@ -180,7 +173,7 @@ public: DIE *getDIE(const DINode *D) const; /// \brief Returns a fresh newly allocated DIELoc. - DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc(); } + DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; } /// \brief Insert DIE into the map. /// @@ -233,7 +226,7 @@ public: void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry); /// \brief Add a DIE attribute data and value. - void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry); + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry); void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); @@ -369,26 +362,12 @@ private: /// If the DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; - /// \brief Returns the DIE entry for the specified debug variable. - DIEEntry *getDIEEntry(const MDNode *N) const { - return MDNodeToDIEEntryMap.lookup(N); - } - - /// \brief Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } - /// \brief Get an anonymous type for index type. DIE *getIndexTyDie(); /// \brief Set D as anonymous type for index which can be reused later. void setIndexTyDie(DIE *D) { IndexTyDie = D; } - /// \brief Creates a new DIEEntry to be a proxy for a debug information - /// entry. - DIEEntry *createDIEEntry(DIE &Entry); - /// If this is a named finished type then include it in the list of types for /// the accelerator tables. void updateAcceleratorTables(const DIScope *Context, const DIType *Ty, diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 371e20a3e5a0..535b1f605853 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -171,10 +171,10 @@ static void EmitLabelDiff(MCStreamer &Streamer, unsigned int Size = 4) { MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; MCContext &Context = Streamer.getContext(); - const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context), - *ToRef = MCSymbolRefExpr::Create(To, Variant, Context); + const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context), + *ToRef = MCSymbolRefExpr::create(To, Variant, Context); const MCExpr *AddrDelta = - MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context); + MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context); Streamer.EmitValue(AddrDelta, Size); } diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index dc6df9cb0144..f1663503c08e 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -1,4 +1,4 @@ -//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===// +//===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "Win64Exception.h" +#include "WinException.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -29,6 +29,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCWin64EH.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -38,28 +39,33 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -Win64Exception::Win64Exception(AsmPrinter *A) - : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), - shouldEmitMoves(false) {} +WinException::WinException(AsmPrinter *A) : EHStreamer(A) { + // MSVC's EH tables are always composed of 32-bit words. All known 64-bit + // platforms use an imagerel32 relocation to refer to symbols. + useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64); +} -Win64Exception::~Win64Exception() {} +WinException::~WinException() {} /// endModule - Emit all exception information that should come after the /// content. -void Win64Exception::endModule() { +void WinException::endModule() { } -void Win64Exception::beginFunction(const MachineFunction *MF) { +void WinException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. bool hasLandingPads = !MMI->getLandingPads().empty(); + const Function *F = MF->getFunction(); + const Function *ParentF = MMI->getWinEHParent(F); + shouldEmitMoves = Asm->needsSEHMoves(); const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = MF->getMMI().getPersonality(); + const Function *Per = MMI->getPersonality(); shouldEmitPersonality = hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit && Per; @@ -68,12 +74,17 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; + // If we're not using CFI, we don't want the CFI or the personality. Emit the + // LSDA if this is the parent function. + if (!Asm->MAI->usesWindowsCFI()) { + shouldEmitLSDA = (hasLandingPads && F == ParentF); + shouldEmitPersonality = false; + return; + } // If this was an outlined handler, we need to define the label corresponding // to the offset of the parent frame relative to the stack pointer after the // prologue. - const Function *F = MF->getFunction(); - const Function *ParentF = MMI->getWinEHParent(F); if (F != ParentF) { WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F); @@ -85,27 +96,24 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { // Emit a symbol assignment. Asm->OutStreamer->EmitAssignment( HandlerTypeParentFrameOffset, - MCConstantExpr::Create(I->second, Asm->OutContext)); + MCConstantExpr::create(I->second, Asm->OutContext)); } } - if (!shouldEmitPersonality && !shouldEmitMoves) - return; + if (shouldEmitMoves || shouldEmitPersonality) + Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym); - Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym); - - if (!shouldEmitPersonality) - return; - - const MCSymbol *PersHandlerSym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + if (shouldEmitPersonality) { + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); + Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + } } /// endFunction - Gather and emit post-function exception information. /// -void Win64Exception::endFunction(const MachineFunction *MF) { - if (!shouldEmitPersonality && !shouldEmitMoves) +void WinException::endFunction(const MachineFunction *MF) { + if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA) return; EHPersonality Per = MMI->getPersonalityType(); @@ -116,16 +124,27 @@ void Win64Exception::endFunction(const MachineFunction *MF) { if (!isMSVCEHPersonality(Per)) MMI->TidyLandingPads(); - if (shouldEmitPersonality) { + if (shouldEmitPersonality || shouldEmitLSDA) { Asm->OutStreamer->PushSection(); - // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + if (shouldEmitMoves || shouldEmitPersonality) { + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + } else { + // Just switch sections to the right xdata section. This use of + // CurrentFnSym assumes that we only emit the LSDA when ending the parent + // function. + MCSection *XData = WinEH::UnwindEmitter::getXDataSection( + Asm->CurrentFnSym, Asm->OutContext); + Asm->OutStreamer->SwitchSection(XData); + } // Emit the tables appropriate to the personality function in use. If we // don't recognize the personality, assume it uses an Itanium-style LSDA. if (Per == EHPersonality::MSVC_Win64SEH) emitCSpecificHandlerTable(); + else if (Per == EHPersonality::MSVC_X86SEH) + emitCSpecificHandlerTable(); // FIXME else if (Per == EHPersonality::MSVC_CXX) emitCXXFrameHandler3Table(MF); else @@ -133,20 +152,24 @@ void Win64Exception::endFunction(const MachineFunction *MF) { Asm->OutStreamer->PopSection(); } - Asm->OutStreamer->EmitWinCFIEndProc(); + + if (shouldEmitMoves) + Asm->OutStreamer->EmitWinCFIEndProc(); } -const MCExpr *Win64Exception::createImageRel32(const MCSymbol *Value) { +const MCExpr *WinException::create32bitRef(const MCSymbol *Value) { if (!Value) - return MCConstantExpr::Create(0, Asm->OutContext); - return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32, + return MCConstantExpr::create(0, Asm->OutContext); + return MCSymbolRefExpr::create(Value, useImageRel32 + ? MCSymbolRefExpr::VK_COFF_IMGREL32 + : MCSymbolRefExpr::VK_None, Asm->OutContext); } -const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) { +const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { if (!GV) - return MCConstantExpr::Create(0, Asm->OutContext); - return createImageRel32(Asm->getSymbol(GV)); + return MCConstantExpr::create(0, Asm->OutContext); + return create32bitRef(Asm->getSymbol(GV)); } /// Emit the language-specific data that __C_specific_handler expects. This @@ -177,7 +200,7 @@ const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) { /// imagerel32 LabelLPad; // Zero means __finally. /// } Entries[NumEntries]; /// }; -void Win64Exception::emitCSpecificHandlerTable() { +void WinException::emitCSpecificHandlerTable() { const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); // Simplifying assumptions for first implementation: @@ -227,16 +250,16 @@ void Win64Exception::emitCSpecificHandlerTable() { // Compute the label range. We may reuse the function begin and end labels // rather than forming new ones. const MCExpr *Begin = - createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); + create32bitRef(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); const MCExpr *End; if (CSE.EndLabel) { // The interval is half-open, so we have to add one to include the return // address of the last invoke in the range. - End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel), - MCConstantExpr::Create(1, Asm->OutContext), + End = MCBinaryExpr::createAdd(create32bitRef(CSE.EndLabel), + MCConstantExpr::create(1, Asm->OutContext), Asm->OutContext); } else { - End = createImageRel32(EHFuncEndSym); + End = create32bitRef(EHFuncEndSym); } // Emit an entry for each action. @@ -248,7 +271,7 @@ void Win64Exception::emitCSpecificHandlerTable() { // emit '1' to indicate a catch-all. const Function *F = Handler.FilterOrFinally; if (F) - Asm->OutStreamer->EmitValue(createImageRel32(Asm->getSymbol(F)), 4); + Asm->OutStreamer->EmitValue(create32bitRef(Asm->getSymbol(F)), 4); else Asm->OutStreamer->EmitIntValue(1, 4); @@ -257,14 +280,14 @@ void Win64Exception::emitCSpecificHandlerTable() { const BlockAddress *BA = Handler.RecoverBA; if (BA) Asm->OutStreamer->EmitValue( - createImageRel32(Asm->GetBlockAddressSymbol(BA)), 4); + create32bitRef(Asm->GetBlockAddressSymbol(BA)), 4); else Asm->OutStreamer->EmitIntValue(0, 4); } } } -void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { +void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { const Function *F = MF->getFunction(); const Function *ParentF = MMI->getWinEHParent(F); auto &OS = *Asm->OutStreamer; @@ -273,91 +296,26 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { StringRef ParentLinkageName = GlobalValue::getRealLinkageName(ParentF->getName()); - MCSymbol *FuncInfoXData = - Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", ParentLinkageName)); - OS.EmitValue(createImageRel32(FuncInfoXData), 4); - - // The Itanium LSDA table sorts similar landing pads together to simplify the - // actions table, but we don't need that. - SmallVector<const LandingPadInfo *, 64> LandingPads; - const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); - LandingPads.reserve(PadInfos.size()); - for (const auto &LP : PadInfos) - LandingPads.push_back(&LP); - - RangeMapType PadMap; - computePadMap(LandingPads, PadMap); - - // The end label of the previous invoke or nounwind try-range. - MCSymbol *LastLabel = Asm->getFunctionBegin(); - - // Whether there is a potentially throwing instruction (currently this means - // an ordinary call) between the end of the previous try-range and now. - bool SawPotentiallyThrowing = false; - - int LastEHState = -2; - - // The parent function and the catch handlers contribute to the 'ip2state' - // table. - - // Include ip2state entries for the beginning of the main function and - // for catch handler functions. - if (F == ParentF) { - FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); - LastEHState = -1; - } else if (FuncInfo.HandlerBaseState.count(F)) { - FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, - FuncInfo.HandlerBaseState[F])); - LastEHState = FuncInfo.HandlerBaseState[F]; - } - for (const auto &MBB : *MF) { - for (const auto &MI : MBB) { - if (!MI.isEHLabel()) { - if (MI.isCall()) - SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); - continue; - } - - // End of the previous try-range? - MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); - if (BeginLabel == LastLabel) - SawPotentiallyThrowing = false; - - // Beginning of a new try-range? - RangeMapType::const_iterator L = PadMap.find(BeginLabel); - if (L == PadMap.end()) - // Nope, it was just some random label. - continue; - - const PadRange &P = L->second; - const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; - assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && - "Inconsistent landing pad map!"); - - // FIXME: Should this be using FuncInfo.HandlerBaseState? - if (SawPotentiallyThrowing && LastEHState != -1) { - FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); - SawPotentiallyThrowing = false; - LastEHState = -1; - } - - if (LandingPad->WinEHState != LastEHState) - FuncInfo.IPToStateList.push_back( - std::make_pair(BeginLabel, LandingPad->WinEHState)); - LastEHState = LandingPad->WinEHState; - LastLabel = LandingPad->EndLabels[P.RangeIndex]; - } + MCSymbol *FuncInfoXData = nullptr; + if (shouldEmitPersonality) { + FuncInfoXData = Asm->OutContext.getOrCreateSymbol( + Twine("$cppxdata$", ParentLinkageName)); + OS.EmitValue(create32bitRef(FuncInfoXData), 4); + + extendIP2StateTable(MF, ParentF, FuncInfo); + + // Defer emission until we've visited the parent function and all the catch + // handlers. Cleanups don't contribute to the ip2state table, so don't count + // them. + if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F)) + return; + ++FuncInfo.NumIPToStateFuncsVisited; + if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size()) + return; + } else { + FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(ParentLinkageName); } - // Defer emission until we've visited the parent function and all the catch - // handlers. Cleanups don't contribute to the ip2state table yet, so don't - // count them. - if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F)) - return; - ++FuncInfo.NumIPToStateFuncsVisited; - if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size()) - return; - MCSymbol *UnwindMapXData = nullptr; MCSymbol *TryBlockMapXData = nullptr; MCSymbol *IPToStateXData = nullptr; @@ -377,9 +335,9 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { // UnwindMapEntry *UnwindMap; // uint32_t NumTryBlocks; // TryBlockMapEntry *TryBlockMap; - // uint32_t IPMapEntries; - // IPToStateMapEntry *IPToStateMap; - // uint32_t UnwindHelp; // (x64/ARM only) + // uint32_t IPMapEntries; // always 0 for x86 + // IPToStateMapEntry *IPToStateMap; // always 0 for x86 + // uint32_t UnwindHelp; // non-x86 only // ESTypeList *ESTypeList; // int32_t EHFlags; // } @@ -389,12 +347,13 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { OS.EmitLabel(FuncInfoXData); OS.EmitIntValue(0x19930522, 4); // MagicNumber OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState - OS.EmitValue(createImageRel32(UnwindMapXData), 4); // UnwindMap + OS.EmitValue(create32bitRef(UnwindMapXData), 4); // UnwindMap OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); // NumTryBlocks - OS.EmitValue(createImageRel32(TryBlockMapXData), 4); // TryBlockMap + OS.EmitValue(create32bitRef(TryBlockMapXData), 4); // TryBlockMap OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4); // IPMapEntries - OS.EmitValue(createImageRel32(IPToStateXData), 4); // IPToStateMap - OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp + OS.EmitValue(create32bitRef(IPToStateXData), 4); // IPToStateMap + if (Asm->MAI->usesWindowsCFI()) + OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp OS.EmitIntValue(0, 4); // ESTypeList OS.EmitIntValue(1, 4); // EHFlags @@ -406,7 +365,7 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { OS.EmitLabel(UnwindMapXData); for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) { OS.EmitIntValue(UME.ToState, 4); // ToState - OS.EmitValue(createImageRel32(UME.Cleanup), 4); // Action + OS.EmitValue(create32bitRef(UME.Cleanup), 4); // Action } } @@ -443,7 +402,7 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { OS.EmitIntValue(TBME.TryHigh, 4); // TryHigh OS.EmitIntValue(CatchHigh, 4); // CatchHigh OS.EmitIntValue(TBME.HandlerArray.size(), 4); // NumCatches - OS.EmitValue(createImageRel32(HandlerMapXData), 4); // HandlerArray + OS.EmitValue(create32bitRef(HandlerMapXData), 4); // HandlerArray } for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { @@ -460,12 +419,6 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { // }; OS.EmitLabel(HandlerMapXData); for (const WinEHHandlerType &HT : TBME.HandlerArray) { - MCSymbol *ParentFrameOffset = - Asm->OutContext.getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(HT.Handler->getName())); - const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::Create( - ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); - // Get the frame escape label with the offset of the catch object. If // the index is -1, then there is no catch object, and we should emit an // offset of zero, indicating that no copy will occur. @@ -475,17 +428,25 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { Asm->OutContext.getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(ParentF->getName()), HT.CatchObjRecoverIdx); - FrameAllocOffsetRef = MCSymbolRefExpr::Create( + FrameAllocOffsetRef = MCSymbolRefExpr::create( FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); } else { - FrameAllocOffsetRef = MCConstantExpr::Create(0, Asm->OutContext); + FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext); } OS.EmitIntValue(HT.Adjectives, 4); // Adjectives - OS.EmitValue(createImageRel32(HT.TypeDescriptor), 4); // Type + OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); // Type OS.EmitValue(FrameAllocOffsetRef, 4); // CatchObjOffset - OS.EmitValue(createImageRel32(HT.Handler), 4); // Handler - OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset + OS.EmitValue(create32bitRef(HT.Handler), 4); // Handler + + if (shouldEmitPersonality) { + MCSymbol *ParentFrameOffset = + Asm->OutContext.getOrCreateParentFrameOffsetSymbol( + GlobalValue::getRealLinkageName(HT.Handler->getName())); + const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create( + ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); + OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset + } } } } @@ -497,8 +458,86 @@ void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { if (IPToStateXData) { OS.EmitLabel(IPToStateXData); for (auto &IPStatePair : FuncInfo.IPToStateList) { - OS.EmitValue(createImageRel32(IPStatePair.first), 4); // IP + OS.EmitValue(create32bitRef(IPStatePair.first), 4); // IP OS.EmitIntValue(IPStatePair.second, 4); // State } } } + +void WinException::extendIP2StateTable(const MachineFunction *MF, + const Function *ParentF, + WinEHFuncInfo &FuncInfo) { + const Function *F = MF->getFunction(); + + // The Itanium LSDA table sorts similar landing pads together to simplify the + // actions table, but we don't need that. + SmallVector<const LandingPadInfo *, 64> LandingPads; + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + LandingPads.reserve(PadInfos.size()); + for (const auto &LP : PadInfos) + LandingPads.push_back(&LP); + + RangeMapType PadMap; + computePadMap(LandingPads, PadMap); + + // The end label of the previous invoke or nounwind try-range. + MCSymbol *LastLabel = Asm->getFunctionBegin(); + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + int LastEHState = -2; + + // The parent function and the catch handlers contribute to the 'ip2state' + // table. + + // Include ip2state entries for the beginning of the main function and + // for catch handler functions. + if (F == ParentF) { + FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); + LastEHState = -1; + } else if (FuncInfo.HandlerBaseState.count(F)) { + FuncInfo.IPToStateList.push_back( + std::make_pair(LastLabel, FuncInfo.HandlerBaseState[F])); + LastEHState = FuncInfo.HandlerBaseState[F]; + } + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isEHLabel()) { + if (MI.isCall()) + SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); + continue; + } + + // End of the previous try-range? + MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::const_iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + const PadRange &P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // FIXME: Should this be using FuncInfo.HandlerBaseState? + if (SawPotentiallyThrowing && LastEHState != -1) { + FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); + SawPotentiallyThrowing = false; + LastEHState = -1; + } + + if (LandingPad->WinEHState != LastEHState) + FuncInfo.IPToStateList.push_back( + std::make_pair(BeginLabel, LandingPad->WinEHState)); + LastEHState = LandingPad->WinEHState; + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + } + } +} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/WinException.h index 5f4237fc0152..478899b79da9 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.h +++ b/lib/CodeGen/AsmPrinter/WinException.h @@ -1,4 +1,4 @@ -//===-- Win64Exception.h - Windows Exception Handling ----------*- C++ -*--===// +//===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -17,33 +17,41 @@ #include "EHStreamer.h" namespace llvm { +class Function; class GlobalValue; class MachineFunction; class MCExpr; +struct WinEHFuncInfo; -class Win64Exception : public EHStreamer { +class WinException : public EHStreamer { /// Per-function flag to indicate if personality info should be emitted. - bool shouldEmitPersonality; + bool shouldEmitPersonality = false; /// Per-function flag to indicate if the LSDA should be emitted. - bool shouldEmitLSDA; + bool shouldEmitLSDA = false; /// Per-function flag to indicate if frame moves info should be emitted. - bool shouldEmitMoves; + bool shouldEmitMoves = false; + + /// True if this is a 64-bit target and we should use image relative offsets. + bool useImageRel32 = false; void emitCSpecificHandlerTable(); void emitCXXFrameHandler3Table(const MachineFunction *MF); - const MCExpr *createImageRel32(const MCSymbol *Value); - const MCExpr *createImageRel32(const GlobalValue *GV); + void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF, + WinEHFuncInfo &FuncInfo); + + const MCExpr *create32bitRef(const MCSymbol *Value); + const MCExpr *create32bitRef(const GlobalValue *GV); public: //===--------------------------------------------------------------------===// // Main entry points. // - Win64Exception(AsmPrinter *A); - ~Win64Exception() override; + WinException(AsmPrinter *A); + ~WinException() override; /// Emit all exception information that should come after the content. void endModule() override; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 9fc3e0bcec9a..6d2af9003509 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -71,6 +71,7 @@ add_llvm_library(LLVMCodeGen MachineSink.cpp MachineTraceMetrics.cpp MachineVerifier.cpp + MIRPrintingPass.cpp OcamlGC.cpp OptimizePHIs.cpp PHIElimination.cpp @@ -129,3 +130,4 @@ add_dependencies(LLVMCodeGen intrinsics_gen) add_subdirectory(SelectionDAG) add_subdirectory(AsmPrinter) +add_subdirectory(MIRParser) diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 2c1858b944c6..6a814038c688 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -170,7 +170,8 @@ class TypePromotionTransaction; void EliminateMostlyEmptyBlock(BasicBlock *BB); bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); bool OptimizeInst(Instruction *I, bool& ModifiedDT); - bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, + Type *AccessTy, unsigned AS); bool OptimizeInlineAsmInst(CallInst *CS); bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT); bool MoveExtToFormExtLoad(Instruction *&I); @@ -1410,11 +1411,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { } if (TLI) { + // Unknown address space. + // TODO: Target hook to pick which address space the intrinsic cares + // about? + unsigned AddrSpace = ~0u; SmallVector<Value*, 2> PtrOps; Type *AccessTy; - if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) + if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace)) while (!PtrOps.empty()) - if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) + if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) return true; } } @@ -2095,6 +2100,7 @@ class AddressingModeMatcher { /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and /// the memory instruction that we're computing this address for. Type *AccessTy; + unsigned AddrSpace; Instruction *MemoryInst; /// AddrMode - This is the addressing mode that we're building up. This is @@ -2114,14 +2120,15 @@ class AddressingModeMatcher { bool IgnoreProfitability; AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, - const TargetMachine &TM, Type *AT, Instruction *MI, - ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs, + const TargetMachine &TM, Type *AT, unsigned AS, + Instruction *MI, ExtAddrMode &AM, + const SetOfInstrs &InsertedTruncs, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) : AddrModeInsts(AMI), TM(TM), TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent()) ->getTargetLowering()), - AccessTy(AT), MemoryInst(MI), AddrMode(AM), + AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { IgnoreProfitability = false; } @@ -2135,7 +2142,7 @@ public: /// optimizations. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p The ongoing transaction where every action should be registered. - static ExtAddrMode Match(Value *V, Type *AccessTy, + static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, SmallVectorImpl<Instruction*> &AddrModeInsts, const TargetMachine &TM, @@ -2144,7 +2151,7 @@ public: TypePromotionTransaction &TPT) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, + bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS, MemoryInst, Result, InsertedTruncs, PromotedInsts, TPT).MatchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -2190,7 +2197,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, TestAddrMode.ScaledReg = ScaleReg; // If the new address isn't legal, bail out. - if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) + if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) return false; // It was legal, so commit it. @@ -2207,7 +2214,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, // If this addressing mode is legal, commit it and remember that we folded // this instruction. - if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { + if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) { AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); AddrMode = TestAddrMode; return true; @@ -2771,7 +2778,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, // just add it to the disp field and check validity. if (VariableOperand == -1) { AddrMode.BaseOffs += ConstantOffset; - if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ + if (ConstantOffset == 0 || + TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) { // Check to see if we can fold the base pointer in too. if (MatchAddr(AddrInst->getOperand(0), Depth+1)) return true; @@ -2894,14 +2902,14 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { // Fold in immediates if legal for the target. AddrMode.BaseOffs += CI->getSExtValue(); - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) return true; AddrMode.BaseOffs -= CI->getSExtValue(); } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { // If this is a global variable, try to fold it into the addressing mode. if (!AddrMode.BaseGV) { AddrMode.BaseGV = GV; - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) return true; AddrMode.BaseGV = nullptr; } @@ -2945,7 +2953,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { AddrMode.HasBaseReg = true; AddrMode.BaseReg = Addr; // Still check for legality in case the target supports [imm] but not [i+r]. - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) return true; AddrMode.HasBaseReg = false; AddrMode.BaseReg = nullptr; @@ -2955,7 +2963,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (AddrMode.Scale == 0) { AddrMode.Scale = 1; AddrMode.ScaledReg = Addr; - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) return true; AddrMode.Scale = 0; AddrMode.ScaledReg = nullptr; @@ -3136,9 +3144,11 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // Get the access type of this use. If the use isn't a pointer, we don't // know what it accesses. Value *Address = User->getOperand(OpNo); - if (!Address->getType()->isPointerTy()) + PointerType *AddrTy = dyn_cast<PointerType>(Address->getType()); + if (!AddrTy) return false; - Type *AddressAccessTy = Address->getType()->getPointerElementType(); + Type *AddressAccessTy = AddrTy->getElementType(); + unsigned AS = AddrTy->getAddressSpace(); // Do a match against the root of this address, ignoring profitability. This // will tell us if the addressing mode for the memory operation will @@ -3146,7 +3156,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode Result; TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS, MemoryInst, Result, InsertedTruncs, PromotedInsts, TPT); Matcher.IgnoreProfitability = true; @@ -3189,7 +3199,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { /// This method is used to optimize both load/store and inline asms with memory /// operands. bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - Type *AccessTy) { + Type *AccessTy, unsigned AddrSpace) { Value *Repl = Addr; // Try to collapse single-value PHI nodes. This is necessary to undo @@ -3229,8 +3239,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // For non-PHIs, determine the addressing mode being computed. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet, - PromotedInsts, TPT); + V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM, + InsertedTruncsSet, PromotedInsts, TPT); // This check is broken into two cases with very similar code to avoid using // getNumUses() as much as possible. Some values have a lot of uses, so @@ -3545,7 +3555,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { Value *OpVal = CS->getArgOperand(ArgNo++); - MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); + MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); } else if (OpInfo.Type == InlineAsm::isInput) ArgNo++; } @@ -4394,15 +4404,19 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (TLI) - return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + if (TLI) { + unsigned AS = LI->getPointerAddressSpace(); + return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); + } return false; } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (TLI) + if (TLI) { + unsigned AS = SI->getPointerAddressSpace(); return OptimizeMemoryInst(I, SI->getOperand(1), - SI->getOperand(0)->getType()); + SI->getOperand(0)->getType(), AS); + } return false; } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 3d62d4887602..dba280fd5aa2 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -71,7 +71,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // all callee-saved registers. In non-return this is any // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); - BitVector Pristine = MFI->getPristineRegs(BB); + BitVector Pristine = MFI->getPristineRegs(MF); for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { if (!IsReturnBlock && !Pristine.test(*I)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 092b7f804e4f..d3687b98b344 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -226,21 +226,21 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { } // Check for any dependencies on Head instructions. - for (MIOperands MO(I); MO.isValid(); ++MO) { - if (MO->isRegMask()) { + for (const MachineOperand &MO : I->operands()) { + if (MO.isRegMask()) { DEBUG(dbgs() << "Won't speculate regmask: " << *I); return false; } - if (!MO->isReg()) + if (!MO.isReg()) continue; - unsigned Reg = MO->getReg(); + unsigned Reg = MO.getReg(); // Remember clobbered regunits. - if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) + if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) ClobberedRegUnits.set(*Units); - if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) + if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) continue; MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI || DefMI->getParent() != Head) @@ -284,19 +284,19 @@ bool SSAIfConv::findInsertionPoint() { } // Update live regunits. - for (MIOperands MO(I); MO.isValid(); ++MO) { + for (const MachineOperand &MO : I->operands()) { // We're ignoring regmask operands. That is conservatively correct. - if (!MO->isReg()) + if (!MO.isReg()) continue; - unsigned Reg = MO->getReg(); + unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // I clobbers Reg, so it isn't live before I. - if (MO->isDef()) + if (MO.isDef()) for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) LiveRegUnits.erase(*Units); // Unless I reads Reg. - if (MO->readsReg()) + if (MO.readsReg()) Reads.push_back(Reg); } // Anything read by I is live before I. diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 79de17567f76..37b3bf17ed1f 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -124,6 +124,12 @@ namespace { // for more information. unsigned MaxOffset; + /// Whether we should try to optimize for size only. + /// Currently, this applies a dead simple heuristic: only consider globals + /// used in minsize functions for merging. + /// FIXME: This could learn about optsize, and be used in the cost model. + bool OnlyOptimizeForSize; + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; /// \brief Merge everything in \p Globals for which the corresponding bit @@ -152,9 +158,10 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetMachine *TM = nullptr, - unsigned MaximalOffset = 0) + unsigned MaximalOffset = 0, + bool OnlyOptimizeForSize = false) : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()), - MaxOffset(MaximalOffset) { + MaxOffset(MaximalOffset), OnlyOptimizeForSize(OnlyOptimizeForSize) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -273,6 +280,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // users, so look through ConstantExpr... Use *UI, *UE; if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) { + if (CE->use_empty()) + continue; UI = &*CE->use_begin(); UE = nullptr; } else if (isa<Instruction>(U.getUser())) { @@ -290,6 +299,12 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, continue; Function *ParentFn = I->getParent()->getParent(); + + // If we're only optimizing for size, ignore non-minsize functions. + if (OnlyOptimizeForSize && + !ParentFn->hasFnAttribute(Attribute::MinSize)) + continue; + size_t UGSIdx = GlobalUsesByFunction[ParentFn]; // If this is the first global the basic block uses, map it to the set @@ -585,6 +600,7 @@ bool GlobalMerge::doFinalization(Module &M) { return false; } -Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) { - return new GlobalMerge(TM, Offset); +Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset, + bool OnlyOptimizeForSize) { + return new GlobalMerge(TM, Offset, OnlyOptimizeForSize); } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 0d59c72cb8f7..e861ceb2a664 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -170,9 +170,12 @@ namespace { bool PreRegAlloc; bool MadeChange; int FnNum; + std::function<bool(const Function &)> PredicateFtor; + public: static char ID; - IfConverter() : MachineFunctionPass(ID), FnNum(-1) { + IfConverter(std::function<bool(const Function &)> Ftor = nullptr) + : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(Ftor) { initializeIfConverterPass(*PassRegistry::getPassRegistry()); } @@ -270,6 +273,9 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { + if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + return false; + const TargetSubtargetInfo &ST = MF.getSubtarget(); TLI = ST.getTargetLowering(); TII = ST.getInstrInfo(); @@ -1691,3 +1697,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { ToBBI.IsAnalyzed = false; FromBBI.IsAnalyzed = false; } + +FunctionPass * +llvm::createIfConverter(std::function<bool(const Function &)> Ftor) { + return new IfConverter(Ftor); +} diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt index fee0347ea659..05905d04dabf 100644 --- a/lib/CodeGen/LLVMBuild.txt +++ b/lib/CodeGen/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmPrinter SelectionDAG +subdirectories = AsmPrinter SelectionDAG MIRParser [component_0] type = Library diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 610c9f47bac7..ff5205801bc4 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -150,12 +150,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; if (StopAfter) { - // FIXME: The intent is that this should eventually write out a YAML file, - // containing the LLVM IR, the machine-level IR (when stopping after a - // machine-level pass), and whatever other information is needed to - // deserialize the code and resume compilation. For now, just write the - // LLVM IR. - PM.add(createPrintModulePass(Out)); + PM.add(createPrintMIRPass(outs())); return false; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index adca4cc738e1..c00b010e763b 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -223,11 +223,11 @@ void LiveIntervals::computeRegMasks() { RMB.first = RegMaskSlots.size(); for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); MI != ME; ++MI) - for (MIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isRegMask()) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isRegMask()) continue; RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); - RegMaskBits.push_back(MO->getRegMask()); + RegMaskBits.push_back(MO.getRegMask()); } // Compute the number of register mask instructions in this block. RMB.second = RegMaskSlots.size() - RMB.first; @@ -927,23 +927,23 @@ public: void updateAllRanges(MachineInstr *MI) { DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI); bool hasRegMask = false; - for (MIOperands MO(MI); MO.isValid(); ++MO) { - if (MO->isRegMask()) + for (MachineOperand &MO : MI->operands()) { + if (MO.isRegMask()) hasRegMask = true; - if (!MO->isReg()) + if (!MO.isReg()) continue; // Aggressively clear all kill flags. // They are reinserted by VirtRegRewriter. - if (MO->isUse()) - MO->setIsKill(false); + if (MO.isUse()) + MO.setIsKill(false); - unsigned Reg = MO->getReg(); + unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { LiveInterval &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { - unsigned SubReg = MO->getSubReg(); + unsigned SubReg = MO.getSubReg(); unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg); for (LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & LaneMask) == 0) diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 27c57d5f3029..08bbe0c3f379 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -218,6 +218,22 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, return true; } +bool LiveRangeEdit::useIsKill(const LiveInterval &LI, + const MachineOperand &MO) const { + const MachineInstr *MI = MO.getParent(); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (LI.Query(Idx).isKill()) + return true; + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + unsigned SubReg = MO.getSubReg(); + unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + for (const LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill()) + return true; + } + return false; +} + /// Find all live intervals that need to shrink, then remove the instruction. void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { assert(MI->allDefsAreDead() && "Def isn't really dead"); @@ -266,9 +282,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // unlikely to change anything. We typically don't want to shrink the // PIC base register that has lots of uses everywhere. // Always shrink COPY uses that probably come from live range splitting. - if (MI->readsVirtualRegister(Reg) && - (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || - LI.Query(Idx).isKill())) + if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) || + (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI)))) ToShrink.insert(&LI); // Remove defined value. diff --git a/lib/CodeGen/MIRParser/CMakeLists.txt b/lib/CodeGen/MIRParser/CMakeLists.txt new file mode 100644 index 000000000000..468f072ed7f3 --- /dev/null +++ b/lib/CodeGen/MIRParser/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_library(LLVMMIRParser + MIRParser.cpp + ) + +add_dependencies(LLVMMIRParser intrinsics_gen) diff --git a/lib/CodeGen/MIRParser/LLVMBuild.txt b/lib/CodeGen/MIRParser/LLVMBuild.txt new file mode 100644 index 000000000000..04ae72290f93 --- /dev/null +++ b/lib/CodeGen/MIRParser/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/CodeGen/MIRParser/LLVMBuild.txt --------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MIRParser +parent = CodeGen +required_libraries = Core Support Target AsmParser CodeGen diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp new file mode 100644 index 000000000000..7a51b3881afc --- /dev/null +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -0,0 +1,171 @@ +//===- MIRParser.cpp - MIR serialization format parser implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that parses the optional LLVM IR and machine +// functions that are stored in MIR files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRParser/MIRParser.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/YAMLTraits.h" +#include <memory> + +using namespace llvm; + +namespace { + +/// This class implements the parsing of LLVM IR that's embedded inside a MIR +/// file. +class MIRParserImpl { + SourceMgr SM; + StringRef Filename; + LLVMContext &Context; + +public: + MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename, + LLVMContext &Context); + + /// Try to parse the optional LLVM module and the machine functions in the MIR + /// file. + /// + /// Return null if an error occurred. + std::unique_ptr<Module> parse(SMDiagnostic &Error); + + /// Parse the machine function in the current YAML document. + /// + /// Return true if an error occurred. + bool parseMachineFunction(yaml::Input &In); + +private: + /// Return a MIR diagnostic converted from an LLVM assembly diagnostic. + SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error, + SMRange SourceRange); +}; + +} // end anonymous namespace + +MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, + StringRef Filename, LLVMContext &Context) + : SM(), Filename(Filename), Context(Context) { + SM.AddNewSourceBuffer(std::move(Contents), SMLoc()); +} + +static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { + *reinterpret_cast<SMDiagnostic *>(Context) = Diag; +} + +std::unique_ptr<Module> MIRParserImpl::parse(SMDiagnostic &Error) { + yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(), + /*Ctxt=*/nullptr, handleYAMLDiag, &Error); + + if (!In.setCurrentDocument()) { + if (!Error.getMessage().empty()) + return nullptr; + // Create an empty module when the MIR file is empty. + return llvm::make_unique<Module>(Filename, Context); + } + + std::unique_ptr<Module> M; + // Parse the block scalar manually so that we can return unique pointer + // without having to go trough YAML traits. + if (const auto *BSN = + dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) { + M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error, + Context); + if (!M) { + Error = diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange()); + return M; + } + In.nextDocument(); + if (!In.setCurrentDocument()) + return M; + } else { + // Create an new, empty module. + M = llvm::make_unique<Module>(Filename, Context); + } + + // Parse the machine functions. + do { + if (parseMachineFunction(In)) + return nullptr; + In.nextDocument(); + } while (In.setCurrentDocument()); + + return M; +} + +bool MIRParserImpl::parseMachineFunction(yaml::Input &In) { + yaml::MachineFunction MF; + yaml::yamlize(In, MF, false); + if (In.error()) + return true; + // TODO: Initialize the real machine function with the state in the yaml + // machine function later on. + return false; +} + +SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error, + SMRange SourceRange) { + assert(SourceRange.isValid()); + + // Translate the location of the error from the location in the llvm IR string + // to the corresponding location in the MIR file. + auto LineAndColumn = SM.getLineAndColumn(SourceRange.Start); + unsigned Line = LineAndColumn.first + Error.getLineNo() - 1; + unsigned Column = Error.getColumnNo(); + StringRef LineStr = Error.getLineContents(); + SMLoc Loc = Error.getLoc(); + + // Get the full line and adjust the column number by taking the indentation of + // LLVM IR into account. + for (line_iterator L(*SM.getMemoryBuffer(SM.getMainFileID()), false), E; + L != E; ++L) { + if (L.line_number() == Line) { + LineStr = *L; + Loc = SMLoc::getFromPointer(LineStr.data()); + auto Indent = LineStr.find(Error.getLineContents()); + if (Indent != StringRef::npos) + Column += Indent; + break; + } + } + + return SMDiagnostic(SM, Loc, Filename, Line, Column, Error.getKind(), + Error.getMessage(), LineStr, Error.getRanges(), + Error.getFixIts()); +} + +std::unique_ptr<Module> llvm::parseMIRFile(StringRef Filename, + SMDiagnostic &Error, + LLVMContext &Context) { + auto FileOrErr = MemoryBuffer::getFile(Filename); + if (std::error_code EC = FileOrErr.getError()) { + Error = SMDiagnostic(Filename, SourceMgr::DK_Error, + "Could not open input file: " + EC.message()); + return std::unique_ptr<Module>(); + } + return parseMIR(std::move(FileOrErr.get()), Error, Context); +} + +std::unique_ptr<Module> llvm::parseMIR(std::unique_ptr<MemoryBuffer> Contents, + SMDiagnostic &Error, + LLVMContext &Context) { + auto Filename = Contents->getBufferIdentifier(); + MIRParserImpl Parser(std::move(Contents), Filename, Context); + return Parser.parse(Error); +} diff --git a/lib/CodeGen/MIRParser/Makefile b/lib/CodeGen/MIRParser/Makefile new file mode 100644 index 000000000000..c02d18806a9c --- /dev/null +++ b/lib/CodeGen/MIRParser/Makefile @@ -0,0 +1,13 @@ +##===- lib/CodeGen/MIRParser/Makefile ----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMMIRParser + +include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp new file mode 100644 index 000000000000..5e0f4cdcbfde --- /dev/null +++ b/lib/CodeGen/MIRPrintingPass.cpp @@ -0,0 +1,109 @@ +//===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that prints out the LLVM module using the MIR +// serialization format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/YAMLTraits.h" + +using namespace llvm; + +namespace llvm { +namespace yaml { + +/// This struct serializes the LLVM IR module. +template <> struct BlockScalarTraits<Module> { + static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) { + Mod.print(OS, nullptr); + } + static StringRef input(StringRef Str, void *Ctxt, Module &Mod) { + llvm_unreachable("LLVM Module is supposed to be parsed separately"); + return ""; + } +}; + +} // end namespace yaml +} // end namespace llvm + +namespace { + +/// This class prints out the machine functions using the MIR serialization +/// format. +class MIRPrinter { + raw_ostream &OS; + +public: + MIRPrinter(raw_ostream &OS) : OS(OS) {} + + void print(const MachineFunction &MF); +}; + +void MIRPrinter::print(const MachineFunction &MF) { + yaml::MachineFunction YamlMF; + YamlMF.Name = MF.getName(); + yaml::Output Out(OS); + Out << YamlMF; +} + +/// This pass prints out the LLVM IR to an output stream using the MIR +/// serialization format. +struct MIRPrintingPass : public MachineFunctionPass { + static char ID; + raw_ostream &OS; + std::string MachineFunctions; + + MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {} + MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {} + + const char *getPassName() const override { return "MIR Printing Pass"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF) override { + std::string Str; + raw_string_ostream StrOS(Str); + MIRPrinter(StrOS).print(MF); + MachineFunctions.append(StrOS.str()); + return false; + } + + virtual bool doFinalization(Module &M) override { + yaml::Output Out(OS); + Out << M; + OS << MachineFunctions; + return false; + } +}; + +char MIRPrintingPass::ID = 0; + +} // end anonymous namespace + +char &llvm::MIRPrintingPassID = MIRPrintingPass::ID; +INITIALIZE_PASS(MIRPrintingPass, "mir-printer", "MIR Printer", false, false) + +namespace llvm { + +MachineFunctionPass *createPrintMIRPass(raw_ostream &OS) { + return new MIRPrintingPass(OS); +} + +} // end namespace llvm diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 43c80b7c21bf..a6863412132b 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -54,7 +54,6 @@ namespace { SourceMap &SrcMap, DenseMap<unsigned, MachineInstr*> &AvailCopyMap); bool CopyPropagateBlock(MachineBasicBlock &MBB); - void removeCopy(MachineInstr *MI); }; } char MachineCopyPropagation::ID = 0; @@ -127,13 +126,6 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, return false; } -// Remove MI from the function because it has been determined it is dead. -// Turn it into a noop KILL instruction as opposed to removing it to -// maintain imp-use/imp-def chains. -void MachineCopyPropagation::removeCopy(MachineInstr *MI) { - MI->setDesc(TII->get(TargetOpcode::KILL)); -} - bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map @@ -183,7 +175,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) I->clearRegisterKills(Def, TRI); - removeCopy(MI); + MI->eraseFromParent(); Changed = true; ++NumDeletes; continue; @@ -252,11 +244,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); - // Treat undef use like defs. - // The backends are allowed to do whatever they want with undef value - // and we cannot be sure this register will not be rewritten to break - // some false dependencies for the hardware for instance. - if (MO.isDef() || MO.isUndef()) { + if (MO.isDef()) { Defs.push_back(Reg); continue; } @@ -270,6 +258,14 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.remove(CI->second); } } + // Treat undef use like defs for copy propagation but not for + // dead copy. We would need to do a liveness check to be sure the copy + // is dead for undef uses. + // The backends are allowed to do whatever they want with undef value + // and we cannot be sure this register will not be rewritten to break + // some false dependencies for the hardware for instance. + if (MO.isUndef()) + Defs.push_back(Reg); } // The instruction has a register mask operand which means that it clobbers @@ -287,7 +283,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { continue; DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; (*DI)->dump()); - removeCopy(*DI); + (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; } @@ -323,7 +319,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { - removeCopy(*DI); + (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 8ec63f823e17..09662b6e48d3 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -584,12 +584,8 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, return -++NumFixedObjects; } -BitVector -MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { - assert(MBB && "MBB must be valid"); - const MachineFunction *MF = MBB->getParent(); - assert(MF && "MBB must be part of a MachineFunction"); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); +BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); BitVector BV(TRI->getNumRegs()); // Before CSI is calculated, no registers are considered pristine. They can be @@ -597,14 +593,10 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { if (!isCalleeSavedInfoValid()) return BV; - for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) BV.set(*CSR); - // Each MBB before the save point has all CSRs pristine. - if (isBeforeSavePoint(*MF, *MBB)) - return BV; - - // On other MBBs the saved CSRs are not pristine. + // Saved CSRs are not pristine. const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo(); for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) @@ -613,40 +605,6 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { return BV; } -// Note: We could use some sort of caching mecanism, but we lack the ability -// to know when the cache is invalid, i.e., the CFG changed. -// Assuming we have that, we can simply compute all the set of MBBs -// that are before the save point. -bool MachineFrameInfo::isBeforeSavePoint(const MachineFunction &MF, - const MachineBasicBlock &MBB) const { - // Early exit if shrink-wrapping did not kick. - if (!Save) - return &MBB == &MF.front(); - - // Starting from MBB, check if there is a path leading to Save that do - // not cross Restore. - SmallPtrSet<const MachineBasicBlock *, 8> Visited; - SmallVector<const MachineBasicBlock *, 8> WorkList; - WorkList.push_back(&MBB); - Visited.insert(&MBB); - do { - const MachineBasicBlock *CurBB = WorkList.pop_back_val(); - // By construction, the region that is after the save point is - // dominated by the Save and post-dominated by the Restore. - // If we do not reach Restore and still reach Save, this - // means MBB is before Save. - if (CurBB == Save) - return true; - if (CurBB == Restore) - continue; - // Enqueue all the successors not already visited. - for (MachineBasicBlock *SuccBB : CurBB->successors()) - if (Visited.insert(SuccBB).second) - WorkList.push_back(SuccBB); - } while (!WorkList.empty()); - return false; -} - unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 205032f92971..e67102865bfa 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1092,9 +1092,8 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( OpndIt.getOperandNo(), Reg, CurRC, TII, TRI); else // Otherwise, just check the current operands. - for (ConstMIOperands OpndIt(this); OpndIt.isValid() && CurRC; ++OpndIt) - CurRC = getRegClassConstraintEffectForVRegImpl(OpndIt.getOperandNo(), Reg, - CurRC, TII, TRI); + for (unsigned i = 0, e = NumOperands; i < e && CurRC; ++i) + CurRC = getRegClassConstraintEffectForVRegImpl(i, Reg, CurRC, TII, TRI); return CurRC; } diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 0690f08d495b..cd820ee1ac52 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -23,11 +23,15 @@ namespace { class UnpackMachineBundles : public MachineFunctionPass { public: static char ID; // Pass identification - UnpackMachineBundles() : MachineFunctionPass(ID) { + UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr) + : MachineFunctionPass(ID), PredicateFtor(Ftor) { initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; + + private: + std::function<bool(const Function &)> PredicateFtor; }; } // end anonymous namespace @@ -37,6 +41,9 @@ INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles", "Unpack machine instruction bundles", false, false) bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { + if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + return false; + bool Changed = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -69,6 +76,10 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { return Changed; } +FunctionPass * +llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) { + return new UnpackMachineBundles(Ftor); +} namespace { class FinalizeMachineBundles : public MachineFunctionPass { diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 3967a2fbf8c8..cce590c6dc5b 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -1012,10 +1012,10 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { SmallVector<const MachineInstr*, 8> Work(1, MI); do { MI = Work.pop_back_val(); - for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg() || !MO->isDef()) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO->getReg(); + unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index d9da7bcca4fa..eec984f53b90 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -114,7 +114,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) { // Otherwise, this is a new entry, create a new symbol for it and add an // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. - BBCallbacks.push_back(BB); + BBCallbacks.emplace_back(BB); BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size()-1; Entry.Fn = BB->getParent(); @@ -308,6 +308,7 @@ void MachineModuleInfo::EndFunction() { // Clean up exception info. LandingPads.clear(); + PersonalityTypeCache = EHPersonality::Unknown; CallSiteMap.clear(); TypeInfos.clear(); FilterIds.clear(); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 5f03390f146c..aed0e500d441 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -655,6 +655,10 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (!MI->isSafeToMove(AA, SawStore)) return false; + // Convergent operations may only be moved to control equivalent locations. + if (MI->isConvergent()) + return false; + // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index e07250bf4f34..34ac9d5b0ed7 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -627,10 +627,12 @@ static bool getDataDeps(const MachineInstr *UseMI, SmallVectorImpl<DataDep> &Deps, const MachineRegisterInfo *MRI) { bool HasPhysRegs = false; - for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { - if (!MO->isReg()) + for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(), + E = UseMI->operands_end(); I != E; ++I) { + const MachineOperand &MO = *I; + if (!MO.isReg()) continue; - unsigned Reg = MO->getReg(); + unsigned Reg = MO.getReg(); if (!Reg) continue; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -638,8 +640,8 @@ static bool getDataDeps(const MachineInstr *UseMI, continue; } // Collect virtual register reads. - if (MO->readsReg()) - Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo())); + if (MO.readsReg()) + Deps.push_back(DataDep(MRI, Reg, UseMI->getOperandNo(I))); } return HasPhysRegs; } @@ -690,28 +692,30 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, SmallVector<unsigned, 8> Kills; SmallVector<unsigned, 8> LiveDefOps; - for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { - if (!MO->isReg()) + for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(), + ME = UseMI->operands_end(); MI != ME; ++MI) { + const MachineOperand &MO = *MI; + if (!MO.isReg()) continue; - unsigned Reg = MO->getReg(); + unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // Track live defs and kills for updating RegUnits. - if (MO->isDef()) { - if (MO->isDead()) + if (MO.isDef()) { + if (MO.isDead()) Kills.push_back(Reg); else - LiveDefOps.push_back(MO.getOperandNo()); - } else if (MO->isKill()) + LiveDefOps.push_back(UseMI->getOperandNo(MI)); + } else if (MO.isKill()) Kills.push_back(Reg); // Identify dependencies. - if (!MO->readsReg()) + if (!MO.readsReg()) continue; for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); if (I == RegUnits.end()) continue; - Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); + Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI))); break; } } @@ -864,15 +868,18 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { SmallVector<unsigned, 8> ReadOps; - for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg()) + + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + const MachineOperand &MO = *MOI; + if (!MO.isReg()) continue; - unsigned Reg = MO->getReg(); + unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - if (MO->readsReg()) - ReadOps.push_back(MO.getOperandNo()); - if (!MO->isDef()) + if (MO.readsReg()) + ReadOps.push_back(MI->getOperandNo(MOI)); + if (!MO.isDef()) continue; // This is a def of Reg. Remove corresponding entries from RegUnits, and // update MI Height to consider the physreg dependencies. @@ -885,7 +892,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, // We may not know the UseMI of this dependency, if it came from the // live-in list. SchedModel can handle a NULL UseMI. DepHeight += SchedModel - .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op); + .computeOperandLatency(MI, MI->getOperandNo(MOI), I->MI, I->Op); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index f5edcb7393e4..ca35ec5fdcf8 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -694,7 +694,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { const MachineFrameInfo *MFI = MF->getFrameInfo(); assert(MFI && "Function has no frame info"); - BitVector PR = MFI->getPristineRegs(MBB); + BitVector PR = MFI->getPristineRegs(*MF); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile index 4ab3e3c0013e..96f7ca585138 100644 --- a/lib/CodeGen/Makefile +++ b/lib/CodeGen/Makefile @@ -9,7 +9,7 @@ LEVEL = ../.. LIBRARYNAME = LLVMCodeGen -PARALLEL_DIRS = SelectionDAG AsmPrinter +PARALLEL_DIRS = SelectionDAG AsmPrinter MIRParser BUILD_ARCHIVE = 1 include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 690224342f64..4cd86e66c0e8 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -295,6 +295,24 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { if (verifyAfter) addVerifyPass(Banner); } + + // Add the passes after the pass P if there is any. + for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator + I = Impl->InsertedPasses.begin(), + E = Impl->InsertedPasses.end(); + I != E; ++I) { + if ((*I).first == PassID) { + assert((*I).second.isValid() && "Illegal Pass ID!"); + Pass *NP; + if ((*I).second.isInstance()) + NP = (*I).second.getInstance(); + else { + NP = Pass::createPass((*I).second.getID()); + assert(NP && "Pass ID not registered"); + } + addPass(NP, false, false); + } + } } else { delete P; } @@ -329,22 +347,6 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter, AnalysisID FinalID = P->getPassID(); addPass(P, verifyAfter, printAfter); // Ends the lifetime of P. - // Add the passes after the pass P if there is any. - for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator - I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); - I != E; ++I) { - if ((*I).first == PassID) { - assert((*I).second.isValid() && "Illegal Pass ID!"); - Pass *NP; - if ((*I).second.isInstance()) - NP = (*I).second.getInstance(); - else { - NP = Pass::createPass((*I).second.getID()); - assert(NP && "Pass ID not registered"); - } - addPass(NP, false, false); - } - } return FinalID; } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index b1538006e724..5f8194983484 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -68,8 +68,8 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { !MI->isRegSequence() && !MI->isPHI()) return false; - for (MIOperands MO(MI); MO.isValid(); ++MO) - if (MO->isReg() && MO->isUse() && MO->readsReg()) + for (const MachineOperand &MO : MI->operands()) + if (MO.isReg() && MO.isUse() && MO.readsReg()) return false; return true; } @@ -100,17 +100,17 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); bool Found = false; for (++UserMI; UserMI != UserE; ++UserMI) { - for (MIOperands MO(UserMI); MO.isValid(); ++MO) { - if (!MO->isReg()) + for (MachineOperand &MO : UserMI->operands()) { + if (!MO.isReg()) continue; - unsigned UserReg = MO->getReg(); + unsigned UserReg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || !TRI->regsOverlap(Reg, UserReg)) continue; // UserMI uses or redefines Reg. Set <undef> flags on all uses. Found = true; - if (MO->isUse()) - MO->setIsUndef(); + if (MO.isUse()) + MO.setIsUndef(); } if (Found) break; diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index ac7d98f258b1..e513a4f1ccf5 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1834,12 +1834,12 @@ public: unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const { unsigned L = 0; - for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->getReg() != Reg || !MO->isDef()) + for (const MachineOperand &MO : DefMI->operands()) { + if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) continue; L |= TRI->getSubRegIndexLaneMask( - TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); - if (MO->readsReg()) + TRI->composeSubRegIndices(SubIdx, MO.getSubReg())); + if (MO.readsReg()) Redef = true; } return L; @@ -2224,13 +2224,13 @@ bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, unsigned Lanes) const { if (MI->isDebugValue()) return false; - for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) continue; - if (!MO->readsReg()) + if (!MO.readsReg()) continue; if (Lanes & TRI->getSubRegIndexLaneMask( - TRI->composeSubRegIndices(SubIdx, MO->getSubReg()))) + TRI->composeSubRegIndices(SubIdx, MO.getSubReg()))) return true; } return false; @@ -2339,11 +2339,11 @@ void JoinVals::pruneValues(JoinVals &Other, // Remove <def,read-undef> flags. This def is now a partial redef. // Also remove <def,dead> flags since the joined live range will // continue past this instruction. - for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); - MO.isValid(); ++MO) { - if (MO->isReg() && MO->isDef() && MO->getReg() == Reg) { - MO->setIsUndef(EraseImpDef); - MO->setIsDead(false); + for (MachineOperand &MO : + Indexes->getInstructionFromIndex(Def)->operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { + MO.setIsUndef(EraseImpDef); + MO.setIsDead(false); } } } diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 667783e29526..450a3051c6ff 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -60,11 +60,11 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) - dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; + dbgs() << PrintVRegOrUnit(LiveInRegs[i], TRI) << " "; dbgs() << '\n'; dbgs() << "Live Out: "; for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i) - dbgs() << PrintReg(LiveOutRegs[i], TRI) << " "; + dbgs() << PrintVRegOrUnit(LiveOutRegs[i], TRI) << " "; dbgs() << '\n'; } diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 7626dd29c00a..a34bd6341d22 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -55,7 +55,8 @@ void RegScavenger::initRegState() { setRegUsed(*I); // Pristine CSRs are also unavailable. - BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB); + const MachineFunction &MF = *MBB->getParent(); + BitVector PR = MF.getFrameInfo()->getPristineRegs(MF); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) setRegUsed(I); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index c60c5183b429..e8e47b764dd2 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1106,25 +1106,25 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, MachineBasicBlock::instr_iterator Begin = MI; MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (Begin != End) { - for (MIOperands MO(--End); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->isDef() || Reg != MO->getReg()) + for (MachineOperand &MO : (--End)->operands()) { + if (!MO.isReg() || MO.isDef() || Reg != MO.getReg()) continue; // DEBUG_VALUE nodes do not contribute to code generation and should // always be ignored. Failure to do so may result in trying to modify // KILL flags on DEBUG_VALUE nodes, which is distressing. - if (MO->isDebug()) + if (MO.isDebug()) continue; // If the register has the internal flag then it could be killing an // internal def of the register. In this case, just skip. We only want // to toggle the flag on operands visible outside the bundle. - if (MO->isInternalRead()) + if (MO.isInternalRead()) continue; - if (MO->isKill() == NewKillState) + if (MO.isKill() == NewKillState) continue; - MO->setIsKill(NewKillState); + MO.setIsKill(NewKillState); if (NewKillState) return; } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2c2dc8598169..a71c6761c75f 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -619,7 +619,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) - if (N0CFP->getValueAPF().isZero()) + if (N0CFP->isZero()) return Op.getOperand(1); // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -1587,6 +1587,11 @@ static bool isNullConstant(SDValue V) { return Const != nullptr && Const->isNullValue(); } +static bool isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + static bool isAllOnesConstant(SDValue V) { ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); return Const != nullptr && Const->isAllOnesValue(); @@ -4764,7 +4769,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4774,7 +4779,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4784,7 +4789,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4794,7 +4799,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4804,7 +4809,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } @@ -7859,7 +7864,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd A, 0) -> A - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N0; // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) @@ -7990,11 +7995,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { // (fsub A, 0) -> A - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N0; // (fsub 0, B) -> -B - if (N0CFP && N0CFP->getValueAPF().isZero()) { + if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) @@ -8060,7 +8065,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (Options.UnsafeFPMath) { // fold (fmul A, 0) -> 0 - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N1; // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) @@ -8776,7 +8781,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } // (fneg (fmul c, x)) -> (fmul -c, x) - if (N0.getOpcode() == ISD::FMUL) { + if (N0.getOpcode() == ISD::FMUL && + (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); if (CFP1) { APFloat CVal = CFP1->getValueAPF(); @@ -9061,14 +9067,18 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT; + unsigned AS; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); } else return false; @@ -9092,7 +9102,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, } else return false; - return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); + return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()), AS); } /// Try turning a load/store into a pre-indexed load/store when the base @@ -11908,9 +11918,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (Op.getOpcode() == ISD::UNDEF) continue; // See if we can combine this build_vector into a blend with a zero vector. - if (!VecIn2.getNode() && (isNullConstant(Op) || - (Op.getOpcode() == ISD::ConstantFP && - cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { + if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { UsesZeroVector = true; continue; } @@ -12988,7 +12996,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || N->getOpcode() == ISD::FDIV) { if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && - cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) + cast<ConstantFPSDNode>(RHSOp.getNode())->isZero())) break; } @@ -13252,7 +13260,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Check to see if we can simplify the select into an fabs node if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { // Allow either -0.0 or 0.0 - if (CFP->getValueAPF().isZero()) { + if (CFP->isZero()) { // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs if ((CC == ISD::SETGE || CC == ISD::SETGT) && N0 == N2 && N3.getOpcode() == ISD::FNEG && diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 7b5b8c4eabcf..f3d75cb32a7d 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -80,33 +80,6 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) { return ExtendKind; } -namespace { -struct WinEHNumbering { - WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), - CurrentBaseState(-1), NextState(0) {} - - WinEHFuncInfo &FuncInfo; - int CurrentBaseState; - int NextState; - - SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack; - SmallPtrSet<const Function *, 4> VisitedHandlers; - - int currentEHNumber() const { - return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState(); - } - - void createUnwindMapEntry(int ToState, ActionHandler *AH); - void createTryBlockMapEntry(int TryLow, int TryHigh, - ArrayRef<CatchHandler *> Handlers); - void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, - ImmutableCallSite CS); - void popUnmatchedActions(int FirstMismatch); - void calculateStateNumbers(const Function &F); - void findActionRootLPads(const Function &F); -}; -} - void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { Fn = &fn; @@ -291,31 +264,18 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (!isMSVCEHPersonality(Personality)) return; - WinEHFuncInfo *EHInfo = nullptr; if (Personality == EHPersonality::MSVC_Win64SEH) { addSEHHandlersForLPads(LPads); } else if (Personality == EHPersonality::MSVC_CXX) { const Function *WinEHParentFn = MMI.getWinEHParent(&fn); - EHInfo = &MMI.getWinEHFuncInfo(WinEHParentFn); - if (EHInfo->LandingPadStateMap.empty()) { - WinEHNumbering Num(*EHInfo); - Num.findActionRootLPads(*WinEHParentFn); - // The VisitedHandlers list is used by both findActionRootLPads and - // calculateStateNumbers, but both functions need to visit all handlers. - Num.VisitedHandlers.clear(); - Num.calculateStateNumbers(*WinEHParentFn); - // Pop everything on the handler stack. - // It may be necessary to call this more than once because a handler can - // be pushed on the stack as a result of clearing the stack. - while (!Num.HandlerStack.empty()) - Num.processCallSite(None, ImmutableCallSite()); - } + WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(WinEHParentFn); + calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo); // Copy the state numbers to LandingPadInfo for the current function, which // could be a handler or the parent. for (const LandingPadInst *LP : LPads) { MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; - MMI.addWinEHState(LPadMBB, EHInfo->LandingPadStateMap[LP]); + MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]); } } } @@ -358,334 +318,6 @@ void FunctionLoweringInfo::addSEHHandlersForLPads( } } -void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) { - WinEHUnwindMapEntry UME; - UME.ToState = ToState; - if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH)) - UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc()); - else - UME.Cleanup = nullptr; - FuncInfo.UnwindMap.push_back(UME); -} - -void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh, - ArrayRef<CatchHandler *> Handlers) { - // See if we already have an entry for this set of handlers. - // This is using iterators rather than a range-based for loop because - // if we find the entry we're looking for we'll need the iterator to erase it. - int NumHandlers = Handlers.size(); - auto I = FuncInfo.TryBlockMap.begin(); - auto E = FuncInfo.TryBlockMap.end(); - for ( ; I != E; ++I) { - auto &Entry = *I; - if (Entry.HandlerArray.size() != (size_t)NumHandlers) - continue; - int N; - for (N = 0; N < NumHandlers; ++N) { - if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc()) - break; // breaks out of inner loop - } - // If all the handlers match, this is what we were looking for. - if (N == NumHandlers) { - break; - } - } - - // If we found an existing entry for this set of handlers, extend the range - // but move the entry to the end of the map vector. The order of entries - // in the map is critical to the way that the runtime finds handlers. - // FIXME: Depending on what has happened with block ordering, this may - // incorrectly combine entries that should remain separate. - if (I != E) { - // Copy the existing entry. - WinEHTryBlockMapEntry Entry = *I; - Entry.TryLow = std::min(TryLow, Entry.TryLow); - Entry.TryHigh = std::max(TryHigh, Entry.TryHigh); - assert(Entry.TryLow <= Entry.TryHigh); - // Erase the old entry and add this one to the back. - FuncInfo.TryBlockMap.erase(I); - FuncInfo.TryBlockMap.push_back(Entry); - return; - } - - // If we didn't find an entry, create a new one. - WinEHTryBlockMapEntry TBME; - TBME.TryLow = TryLow; - TBME.TryHigh = TryHigh; - assert(TBME.TryLow <= TBME.TryHigh); - for (CatchHandler *CH : Handlers) { - WinEHHandlerType HT; - if (CH->getSelector()->isNullValue()) { - HT.Adjectives = 0x40; - HT.TypeDescriptor = nullptr; - } else { - auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts()); - // Selectors are always pointers to GlobalVariables with 'struct' type. - // The struct has two fields, adjectives and a type descriptor. - auto *CS = cast<ConstantStruct>(GV->getInitializer()); - HT.Adjectives = - cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue(); - HT.TypeDescriptor = - cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts()); - } - HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc()); - HT.CatchObjRecoverIdx = CH->getExceptionVarIndex(); - TBME.HandlerArray.push_back(HT); - } - FuncInfo.TryBlockMap.push_back(TBME); -} - -static void print_name(const Value *V) { -#ifndef NDEBUG - if (!V) { - DEBUG(dbgs() << "null"); - return; - } - - if (const auto *F = dyn_cast<Function>(V)) - DEBUG(dbgs() << F->getName()); - else - DEBUG(V->dump()); -#endif -} - -void WinEHNumbering::processCallSite( - MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, - ImmutableCallSite CS) { - DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber() - << ") for: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); - - DEBUG(dbgs() << "HandlerStack: \n"); - for (int I = 0, E = HandlerStack.size(); I < E; ++I) { - DEBUG(dbgs() << " "); - print_name(HandlerStack[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << '\n'); - } - DEBUG(dbgs() << "Actions: \n"); - for (int I = 0, E = Actions.size(); I < E; ++I) { - DEBUG(dbgs() << " "); - print_name(Actions[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << '\n'); - } - int FirstMismatch = 0; - for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E; - ++FirstMismatch) { - if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() != - Actions[FirstMismatch]->getHandlerBlockOrFunc()) - break; - } - - // Remove unmatched actions from the stack and process their EH states. - popUnmatchedActions(FirstMismatch); - - DEBUG(dbgs() << "Pushing actions for CallSite: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); - - bool LastActionWasCatch = false; - const LandingPadInst *LastRootLPad = nullptr; - for (size_t I = FirstMismatch; I != Actions.size(); ++I) { - // We can reuse eh states when pushing two catches for the same invoke. - bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get()); - auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc()); - // Various conditions can lead to a handler being popped from the - // stack and re-pushed later. That shouldn't create a new state. - // FIXME: Can code optimization lead to re-used handlers? - if (FuncInfo.HandlerEnclosedState.count(Handler)) { - // If we already assigned the state enclosed by this handler re-use it. - Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]); - continue; - } - const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler]; - if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) { - DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n"); - Actions[I]->setEHState(currentEHNumber()); - } else { - DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", "); - print_name(Actions[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << ") with EH state " << NextState << "\n"); - createUnwindMapEntry(currentEHNumber(), Actions[I].get()); - DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n"); - Actions[I]->setEHState(NextState); - NextState++; - } - HandlerStack.push_back(std::move(Actions[I])); - LastActionWasCatch = CurrActionIsCatch; - LastRootLPad = RootLPad; - } - - // This is used to defer numbering states for a handler until after the - // last time it appears in an invoke action list. - if (CS.isInvoke()) { - for (int I = 0, E = HandlerStack.size(); I < E; ++I) { - auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc()); - if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction())) - continue; - FuncInfo.LastInvokeVisited[Handler] = true; - DEBUG(dbgs() << "Last invoke of "); - print_name(Handler); - DEBUG(dbgs() << " has been visited.\n"); - } - } - - DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); -} - -void WinEHNumbering::popUnmatchedActions(int FirstMismatch) { - // Don't recurse while we are looping over the handler stack. Instead, defer - // the numbering of the catch handlers until we are done popping. - SmallVector<CatchHandler *, 4> PoppedCatches; - for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) { - std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val(); - if (isa<CatchHandler>(Handler.get())) - PoppedCatches.push_back(cast<CatchHandler>(Handler.release())); - } - - int TryHigh = NextState - 1; - int LastTryLowIdx = 0; - for (int I = 0, E = PoppedCatches.size(); I != E; ++I) { - CatchHandler *CH = PoppedCatches[I]; - DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n"); - if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) { - int TryLow = CH->getEHState(); - auto Handlers = - makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1); - DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh); - for (size_t J = 0; J < Handlers.size(); ++J) { - DEBUG(dbgs() << ", "); - print_name(Handlers[J]->getHandlerBlockOrFunc()); - } - DEBUG(dbgs() << ")\n"); - createTryBlockMapEntry(TryLow, TryHigh, Handlers); - LastTryLowIdx = I + 1; - } - } - - for (CatchHandler *CH : PoppedCatches) { - if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) { - if (FuncInfo.LastInvokeVisited[F]) { - DEBUG(dbgs() << "Assigning base state " << NextState << " to "); - print_name(F); - DEBUG(dbgs() << '\n'); - FuncInfo.HandlerBaseState[F] = NextState; - DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() - << ", null)\n"); - createUnwindMapEntry(currentEHNumber(), nullptr); - ++NextState; - calculateStateNumbers(*F); - } - else { - DEBUG(dbgs() << "Deferring handling of "); - print_name(F); - DEBUG(dbgs() << " until last invoke visited.\n"); - } - } - delete CH; - } -} - -void WinEHNumbering::calculateStateNumbers(const Function &F) { - auto I = VisitedHandlers.insert(&F); - if (!I.second) - return; // We've already visited this handler, don't renumber it. - - int OldBaseState = CurrentBaseState; - if (FuncInfo.HandlerBaseState.count(&F)) { - CurrentBaseState = FuncInfo.HandlerBaseState[&F]; - } - - size_t SavedHandlerStackSize = HandlerStack.size(); - - DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n'); - SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; - for (const BasicBlock &BB : F) { - for (const Instruction &I : BB) { - const auto *CI = dyn_cast<CallInst>(&I); - if (!CI || CI->doesNotThrow()) - continue; - processCallSite(None, CI); - } - const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); - if (!II) - continue; - const LandingPadInst *LPI = II->getLandingPadInst(); - auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); - if (!ActionsCall) - continue; - assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); - parseEHActions(ActionsCall, ActionList); - if (ActionList.empty()) - continue; - processCallSite(ActionList, II); - ActionList.clear(); - FuncInfo.LandingPadStateMap[LPI] = currentEHNumber(); - DEBUG(dbgs() << "Assigning state " << currentEHNumber() - << " to landing pad at " << LPI->getParent()->getName() - << '\n'); - } - - // Pop any actions that were pushed on the stack for this function. - popUnmatchedActions(SavedHandlerStackSize); - - DEBUG(dbgs() << "Assigning max state " << NextState - 1 - << " to " << F.getName() << '\n'); - FuncInfo.CatchHandlerMaxState[&F] = NextState - 1; - - CurrentBaseState = OldBaseState; -} - -// This function follows the same basic traversal as calculateStateNumbers -// but it is necessary to identify the root landing pad associated -// with each action before we start assigning state numbers. -void WinEHNumbering::findActionRootLPads(const Function &F) { - auto I = VisitedHandlers.insert(&F); - if (!I.second) - return; // We've already visited this handler, don't revisit it. - - SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; - for (const BasicBlock &BB : F) { - const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); - if (!II) - continue; - const LandingPadInst *LPI = II->getLandingPadInst(); - auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); - if (!ActionsCall) - continue; - - assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); - parseEHActions(ActionsCall, ActionList); - if (ActionList.empty()) - continue; - for (int I = 0, E = ActionList.size(); I < E; ++I) { - if (auto *Handler - = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) { - FuncInfo.LastInvoke[Handler] = II; - // Don't replace the root landing pad if we previously saw this - // handler in a different function. - if (FuncInfo.RootLPad.count(Handler) && - FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F) - continue; - DEBUG(dbgs() << "Setting root lpad for "); - print_name(Handler); - DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n'); - FuncInfo.RootLPad[Handler] = LPI; - } - } - // Walk the actions again and look for nested handlers. This has to - // happen after all of the actions have been processed in the current - // function. - for (int I = 0, E = ActionList.size(); I < E; ++I) - if (auto *Handler - = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) - findActionRootLPads(*Handler); - ActionList.clear(); - } -} - /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a /// different function. diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index eeaebf780cc3..96e2ff89013a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -602,10 +602,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { - SDValue Res = GetPromotedInteger(N->getOperand(0)); - SDValue Amt = N->getOperand(1); - Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SHL, SDLoc(N), Res.getValueType(), Res, Amt); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) + LHS = GetPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -625,19 +628,25 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); // The input value must be properly sign extended. - SDValue Res = SExtPromotedInteger(N->getOperand(0)); - SDValue Amt = N->getOperand(1); - Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SRA, SDLoc(N), Res.getValueType(), Res, Amt); + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) + LHS = SExtPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); // The input value must be properly zero extended. - SDValue Res = ZExtPromotedInteger(N->getOperand(0)); - SDValue Amt = N->getOperand(1); - Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SRL, SDLoc(N), Res.getValueType(), Res, Amt); + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) + LHS = ZExtPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 3853ada5fb0b..f4c7b5934829 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -71,7 +71,7 @@ SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { if (!SUnits.empty()) Addr = &SUnits[0]; #endif - SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + SUnits.emplace_back(N, (unsigned)SUnits.size()); assert((Addr == nullptr || Addr == &SUnits[0]) && "SUnits std::vector reallocated on the fly!"); SUnits.back().OrigNode = &SUnits.back(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index efd4bd9a4d89..cf51e756d847 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1810,6 +1810,13 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { return SDValue(N, 0); } +SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { + if (VT == V.getValueType()) + return V; + + return getNode(ISD::BITCAST, SDLoc(V), VT, V); +} + /// getAddrSpaceCast - Return an AddrSpaceCastSDNode. SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS) { @@ -2425,6 +2432,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne = KnownOne.trunc(BitWidth); break; } + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: { + APInt Op0Zero, Op0One; + APInt Op1Zero, Op1One; + computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth); + computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth); + + KnownZero = Op0Zero & Op1Zero; + KnownOne = Op0One & Op1One; + break; + } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { @@ -2528,7 +2548,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Tmp == 1) return 1; // Early out. Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); return std::min(Tmp, Tmp2); - + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + return std::min(Tmp, Tmp2); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -2903,7 +2931,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::FP_TO_UINT: case ISD::TRUNCATE: case ISD::UINT_TO_FP: - case ISD::SINT_TO_FP: { + case ISD::SINT_TO_FP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: { EVT SVT = VT.getScalarType(); EVT InVT = BV->getValueType(0); EVT InSVT = InVT.getScalarType(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 85303d27dcf8..8ba957d62870 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1002,7 +1002,16 @@ bool SelectionDAGBuilder::findValue(const Value *V) const { SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; - if (N.getNode()) return N; + if (N.getNode()) { + if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) { + // Remove the debug location from the node as the node is about to be used + // in a location which may differ from the original debug location. This + // is relevant to Constant and ConstantFP nodes because they can appear + // as constant expressions inside PHI nodes. + N->setDebugLoc(DebugLoc()); + } + return N; + } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); @@ -2282,7 +2291,11 @@ void SelectionDAGBuilder::visitSelect(const User &I) { while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) VT = TLI.getTypeToTransformTo(Ctx, VT); - if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT)) { + if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && + // If the underlying comparison instruction is used by any other instruction, + // the consumed instructions won't be destroyed, so it is not profitable + // to convert to a min/max. + cast<SelectInst>(&I)->getCondition()->hasOneUse()) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); @@ -2848,7 +2861,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + + // The IR notion of invariant_load only guarantees that all *non-faulting* + // invariant loads result in the same value. The MI notion of invariant load + // guarantees that the load can be legally moved to any location within its + // containing function. The MI notion of invariant_load is stronger than the + // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load + // with a guarantee that the location being loaded from is dereferenceable + // throughout the function's lifetime. + + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && + isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -7437,7 +7460,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, JumpTableHeader JTH(Clusters[First].Low->getValue(), Clusters[Last].High->getValue(), SI->getCondition(), nullptr, false); - JTCases.push_back(JumpTableBlock(JTH, JT)); + JTCases.emplace_back(std::move(JTH), std::move(JT)); JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, JTCases.size() - 1, Weight); @@ -7600,7 +7623,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, const int BitWidth = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); - assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!"); + assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); if (Low.isNonNegative() && High.slt(BitWidth)) { // Optimize the case where all the case values fit in a @@ -7628,10 +7651,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, // Update Mask, Bits and ExtraWeight. uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); - for (uint64_t j = Lo; j <= Hi; ++j) { - CB->Mask |= 1ULL << j; - CB->Bits++; - } + assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); + CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; + CB->Bits += Hi - Lo + 1; CB->ExtraWeight += Clusters[i].Weight; TotalWeight += Clusters[i].Weight; assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); @@ -7650,9 +7672,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); } - BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(), - -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI))); + BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), + SI->getCondition(), -1U, MVT::Other, false, nullptr, + nullptr, std::move(BTI)); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalWeight); @@ -7746,8 +7768,10 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { Clusters[DstIndex++] = BitTestCluster; } else { - for (unsigned I = First; I <= Last; ++I) - std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + size_t NumClusters = Last - First + 1; + std::memmove(&Clusters[DstIndex], &Clusters[First], + sizeof(Clusters[0]) * NumClusters); + DstIndex += NumClusters; } } Clusters.resize(DstIndex); @@ -7783,22 +7807,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, const APInt &BigValue = Big.Low->getValue(); // Check that there is only one bit different. - if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && - (SmallValue | BigValue) == BigValue) { - // Isolate the common bit. - APInt CommonBit = BigValue & ~SmallValue; - assert((SmallValue | CommonBit) == BigValue && - CommonBit.countPopulation() == 1 && "Not a common bit?"); - + APInt CommonBit = BigValue ^ SmallValue; + if (CommonBit.isPowerOf2()) { SDValue CondLHS = getValue(Cond); EVT VT = CondLHS.getValueType(); SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, DL, VT)); - SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or, - DAG.getConstant(BigValue, DL, VT), - ISD::SETEQ); + SDValue Cond = DAG.getSetCC( + DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), + ISD::SETEQ); // Update successor info. // Both Small and Big will jump to Small.BB, so we sum up the weights. diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 2d4ab6c5077a..8bbfa01e7594 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -238,17 +238,6 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, SDValue ActualCallee = Builder.getValue(ISP.getActualCallee()); - // Handle immediate and symbolic callees. - if (auto *ConstCallee = dyn_cast<ConstantSDNode>(ActualCallee.getNode())) - ActualCallee = Builder.DAG.getIntPtrConstant(ConstCallee->getZExtValue(), - Builder.getCurSDLoc(), - /*isTarget=*/true); - else if (auto *SymbolicCallee = - dyn_cast<GlobalAddressSDNode>(ActualCallee.getNode())) - ActualCallee = Builder.DAG.getTargetGlobalAddress( - SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee), - SymbolicCallee->getValueType(0)); - assert(CS.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 833da4b6d59c..9daf2a50ad8f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -254,7 +254,7 @@ const MCExpr * TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,MCContext &Ctx) const{ // The normal PIC reloc base is the label at the start of the jump table. - return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx); + return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx); } bool diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index ffe59c19d3e0..1e8e03f9a7df 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -315,9 +315,9 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, // Create an expression to calculate the offset of the callsite from function // entry. - const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( - MCSymbolRefExpr::Create(MILabel, OutContext), - MCSymbolRefExpr::Create(AP.CurrentFnSymForSize, OutContext), + const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(MILabel, OutContext), + MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext); CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations), diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 92488defc793..c809087d3da4 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -471,11 +471,13 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, MI->getOpcode() == TargetOpcode::PATCHPOINT) { // Fold stackmap/patchpoint. NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); + if (NewMI) + MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI =foldMemoryOperandImpl(MF, MI, Ops, FI); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI); } - + if (NewMI) { NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. @@ -493,8 +495,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); - // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. - return MBB->insert(MI, NewMI); + return NewMI; } // Straight COPY may fold as load/store. @@ -539,15 +540,15 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, isLoadFromStackSlot(LoadMI, FrameIndex)) { // Fold stackmap/patchpoint. NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); + if (NewMI) + NewMI = MBB.insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI); } if (!NewMI) return nullptr; - NewMI = MBB.insert(MI, NewMI); - // Copy the memoperands from the load to the folded instruction. if (MI->memoperands_empty()) { NewMI->setMemRefs(LoadMI->memoperands_begin(), diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index b7f1db6529f7..1bc89aa2271d 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1632,7 +1632,8 @@ TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a32bdf8955ce..d7b043dac013 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -30,7 +30,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" @@ -63,7 +63,8 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, const MCSymbol *Sym) const { SmallString<64> NameData("DW.ref."); NameData += Sym->getName(); - MCSymbol *Label = getContext().getOrCreateSymbol(NameData); + MCSymbolELF *Label = + cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData)); Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); Streamer.EmitSymbolAttribute(Label, MCSA_Weak); StringRef Prefix = ".data."; @@ -75,8 +76,8 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Streamer.SwitchSection(Sec); Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); - const MCExpr *E = MCConstantExpr::Create(Size, getContext()); - Streamer.EmitELFSize(Label, E); + const MCExpr *E = MCConstantExpr::create(Size, getContext()); + Streamer.emitELFSize(Label, E); Streamer.EmitLabel(Label); Streamer.EmitSymbolValue(Sym, Size); @@ -101,7 +102,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( } return TargetLoweringObjectFile:: - getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()), + getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()), Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } @@ -684,7 +685,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( } return TargetLoweringObjectFile:: - getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()), + getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()), Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } @@ -760,16 +761,16 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */); const MCExpr *BSymExpr = - MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); + MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); const MCExpr *LHS = - MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx); + MCSymbolRefExpr::create(Stub, MCSymbolRefExpr::VK_None, Ctx); if (!Offset) - return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx); + return MCBinaryExpr::createSub(LHS, BSymExpr, Ctx); const MCExpr *RHS = - MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx); - return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + MCBinaryExpr::createAdd(BSymExpr, MCConstantExpr::create(Offset, Ctx), Ctx); + return MCBinaryExpr::createSub(LHS, RHS, Ctx); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 9fb1b5b65fbb..32d5100f8495 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -417,17 +417,11 @@ void VirtRegRewriter::rewrite() { // Finally, remove any identity copies. if (MI->isIdentityCopy()) { ++NumIdCopies; - if (MI->getNumOperands() == 2) { - DEBUG(dbgs() << "Deleting identity copy.\n"); - if (Indexes) - Indexes->removeMachineInstrFromMaps(MI); - // It's safe to erase MI because MII has already been incremented. - MI->eraseFromParent(); - } else { - // Transform identity copy to a KILL to deal with subregisters. - MI->setDesc(TII->get(TargetOpcode::KILL)); - DEBUG(dbgs() << "Identity copy: " << *MI); - } + DEBUG(dbgs() << "Deleting identity copy.\n"); + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + // It's safe to erase MI because MII has already been incremented. + MI->eraseFromParent(); } } } diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 7246e1cf3ea5..c2b3d84ca363 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -2480,3 +2480,377 @@ void llvm::parseEHActions( } std::reverse(Actions.begin(), Actions.end()); } + +namespace { +struct WinEHNumbering { + WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), + CurrentBaseState(-1), NextState(0) {} + + WinEHFuncInfo &FuncInfo; + int CurrentBaseState; + int NextState; + + SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack; + SmallPtrSet<const Function *, 4> VisitedHandlers; + + int currentEHNumber() const { + return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState(); + } + + void createUnwindMapEntry(int ToState, ActionHandler *AH); + void createTryBlockMapEntry(int TryLow, int TryHigh, + ArrayRef<CatchHandler *> Handlers); + void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, + ImmutableCallSite CS); + void popUnmatchedActions(int FirstMismatch); + void calculateStateNumbers(const Function &F); + void findActionRootLPads(const Function &F); +}; +} + +void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) { + WinEHUnwindMapEntry UME; + UME.ToState = ToState; + if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH)) + UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc()); + else + UME.Cleanup = nullptr; + FuncInfo.UnwindMap.push_back(UME); +} + +void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh, + ArrayRef<CatchHandler *> Handlers) { + // See if we already have an entry for this set of handlers. + // This is using iterators rather than a range-based for loop because + // if we find the entry we're looking for we'll need the iterator to erase it. + int NumHandlers = Handlers.size(); + auto I = FuncInfo.TryBlockMap.begin(); + auto E = FuncInfo.TryBlockMap.end(); + for ( ; I != E; ++I) { + auto &Entry = *I; + if (Entry.HandlerArray.size() != (size_t)NumHandlers) + continue; + int N; + for (N = 0; N < NumHandlers; ++N) { + if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc()) + break; // breaks out of inner loop + } + // If all the handlers match, this is what we were looking for. + if (N == NumHandlers) { + break; + } + } + + // If we found an existing entry for this set of handlers, extend the range + // but move the entry to the end of the map vector. The order of entries + // in the map is critical to the way that the runtime finds handlers. + // FIXME: Depending on what has happened with block ordering, this may + // incorrectly combine entries that should remain separate. + if (I != E) { + // Copy the existing entry. + WinEHTryBlockMapEntry Entry = *I; + Entry.TryLow = std::min(TryLow, Entry.TryLow); + Entry.TryHigh = std::max(TryHigh, Entry.TryHigh); + assert(Entry.TryLow <= Entry.TryHigh); + // Erase the old entry and add this one to the back. + FuncInfo.TryBlockMap.erase(I); + FuncInfo.TryBlockMap.push_back(Entry); + return; + } + + // If we didn't find an entry, create a new one. + WinEHTryBlockMapEntry TBME; + TBME.TryLow = TryLow; + TBME.TryHigh = TryHigh; + assert(TBME.TryLow <= TBME.TryHigh); + for (CatchHandler *CH : Handlers) { + WinEHHandlerType HT; + if (CH->getSelector()->isNullValue()) { + HT.Adjectives = 0x40; + HT.TypeDescriptor = nullptr; + } else { + auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts()); + // Selectors are always pointers to GlobalVariables with 'struct' type. + // The struct has two fields, adjectives and a type descriptor. + auto *CS = cast<ConstantStruct>(GV->getInitializer()); + HT.Adjectives = + cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue(); + HT.TypeDescriptor = + cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts()); + } + HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc()); + HT.CatchObjRecoverIdx = CH->getExceptionVarIndex(); + TBME.HandlerArray.push_back(HT); + } + FuncInfo.TryBlockMap.push_back(TBME); +} + +static void print_name(const Value *V) { +#ifndef NDEBUG + if (!V) { + DEBUG(dbgs() << "null"); + return; + } + + if (const auto *F = dyn_cast<Function>(V)) + DEBUG(dbgs() << F->getName()); + else + DEBUG(V->dump()); +#endif +} + +void WinEHNumbering::processCallSite( + MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, + ImmutableCallSite CS) { + DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber() + << ") for: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); + + DEBUG(dbgs() << "HandlerStack: \n"); + for (int I = 0, E = HandlerStack.size(); I < E; ++I) { + DEBUG(dbgs() << " "); + print_name(HandlerStack[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "Actions: \n"); + for (int I = 0, E = Actions.size(); I < E; ++I) { + DEBUG(dbgs() << " "); + print_name(Actions[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << '\n'); + } + int FirstMismatch = 0; + for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E; + ++FirstMismatch) { + if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() != + Actions[FirstMismatch]->getHandlerBlockOrFunc()) + break; + } + + // Remove unmatched actions from the stack and process their EH states. + popUnmatchedActions(FirstMismatch); + + DEBUG(dbgs() << "Pushing actions for CallSite: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); + + bool LastActionWasCatch = false; + const LandingPadInst *LastRootLPad = nullptr; + for (size_t I = FirstMismatch; I != Actions.size(); ++I) { + // We can reuse eh states when pushing two catches for the same invoke. + bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get()); + auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc()); + // Various conditions can lead to a handler being popped from the + // stack and re-pushed later. That shouldn't create a new state. + // FIXME: Can code optimization lead to re-used handlers? + if (FuncInfo.HandlerEnclosedState.count(Handler)) { + // If we already assigned the state enclosed by this handler re-use it. + Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]); + continue; + } + const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler]; + if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) { + DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n"); + Actions[I]->setEHState(currentEHNumber()); + } else { + DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", "); + print_name(Actions[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << ") with EH state " << NextState << "\n"); + createUnwindMapEntry(currentEHNumber(), Actions[I].get()); + DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n"); + Actions[I]->setEHState(NextState); + NextState++; + } + HandlerStack.push_back(std::move(Actions[I])); + LastActionWasCatch = CurrActionIsCatch; + LastRootLPad = RootLPad; + } + + // This is used to defer numbering states for a handler until after the + // last time it appears in an invoke action list. + if (CS.isInvoke()) { + for (int I = 0, E = HandlerStack.size(); I < E; ++I) { + auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc()); + if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction())) + continue; + FuncInfo.LastInvokeVisited[Handler] = true; + DEBUG(dbgs() << "Last invoke of "); + print_name(Handler); + DEBUG(dbgs() << " has been visited.\n"); + } + } + + DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); +} + +void WinEHNumbering::popUnmatchedActions(int FirstMismatch) { + // Don't recurse while we are looping over the handler stack. Instead, defer + // the numbering of the catch handlers until we are done popping. + SmallVector<CatchHandler *, 4> PoppedCatches; + for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) { + std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val(); + if (isa<CatchHandler>(Handler.get())) + PoppedCatches.push_back(cast<CatchHandler>(Handler.release())); + } + + int TryHigh = NextState - 1; + int LastTryLowIdx = 0; + for (int I = 0, E = PoppedCatches.size(); I != E; ++I) { + CatchHandler *CH = PoppedCatches[I]; + DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n"); + if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) { + int TryLow = CH->getEHState(); + auto Handlers = + makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1); + DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh); + for (size_t J = 0; J < Handlers.size(); ++J) { + DEBUG(dbgs() << ", "); + print_name(Handlers[J]->getHandlerBlockOrFunc()); + } + DEBUG(dbgs() << ")\n"); + createTryBlockMapEntry(TryLow, TryHigh, Handlers); + LastTryLowIdx = I + 1; + } + } + + for (CatchHandler *CH : PoppedCatches) { + if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) { + if (FuncInfo.LastInvokeVisited[F]) { + DEBUG(dbgs() << "Assigning base state " << NextState << " to "); + print_name(F); + DEBUG(dbgs() << '\n'); + FuncInfo.HandlerBaseState[F] = NextState; + DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() + << ", null)\n"); + createUnwindMapEntry(currentEHNumber(), nullptr); + ++NextState; + calculateStateNumbers(*F); + } + else { + DEBUG(dbgs() << "Deferring handling of "); + print_name(F); + DEBUG(dbgs() << " until last invoke visited.\n"); + } + } + delete CH; + } +} + +void WinEHNumbering::calculateStateNumbers(const Function &F) { + auto I = VisitedHandlers.insert(&F); + if (!I.second) + return; // We've already visited this handler, don't renumber it. + + int OldBaseState = CurrentBaseState; + if (FuncInfo.HandlerBaseState.count(&F)) { + CurrentBaseState = FuncInfo.HandlerBaseState[&F]; + } + + size_t SavedHandlerStackSize = HandlerStack.size(); + + DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n'); + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + const auto *CI = dyn_cast<CallInst>(&I); + if (!CI || CI->doesNotThrow()) + continue; + processCallSite(None, CI); + } + const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + const LandingPadInst *LPI = II->getLandingPadInst(); + auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); + if (!ActionsCall) + continue; + assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); + parseEHActions(ActionsCall, ActionList); + if (ActionList.empty()) + continue; + processCallSite(ActionList, II); + ActionList.clear(); + FuncInfo.LandingPadStateMap[LPI] = currentEHNumber(); + DEBUG(dbgs() << "Assigning state " << currentEHNumber() + << " to landing pad at " << LPI->getParent()->getName() + << '\n'); + } + + // Pop any actions that were pushed on the stack for this function. + popUnmatchedActions(SavedHandlerStackSize); + + DEBUG(dbgs() << "Assigning max state " << NextState - 1 + << " to " << F.getName() << '\n'); + FuncInfo.CatchHandlerMaxState[&F] = NextState - 1; + + CurrentBaseState = OldBaseState; +} + +// This function follows the same basic traversal as calculateStateNumbers +// but it is necessary to identify the root landing pad associated +// with each action before we start assigning state numbers. +void WinEHNumbering::findActionRootLPads(const Function &F) { + auto I = VisitedHandlers.insert(&F); + if (!I.second) + return; // We've already visited this handler, don't revisit it. + + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + for (const BasicBlock &BB : F) { + const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + const LandingPadInst *LPI = II->getLandingPadInst(); + auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); + if (!ActionsCall) + continue; + + assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); + parseEHActions(ActionsCall, ActionList); + if (ActionList.empty()) + continue; + for (int I = 0, E = ActionList.size(); I < E; ++I) { + if (auto *Handler + = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) { + FuncInfo.LastInvoke[Handler] = II; + // Don't replace the root landing pad if we previously saw this + // handler in a different function. + if (FuncInfo.RootLPad.count(Handler) && + FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F) + continue; + DEBUG(dbgs() << "Setting root lpad for "); + print_name(Handler); + DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n'); + FuncInfo.RootLPad[Handler] = LPI; + } + } + // Walk the actions again and look for nested handlers. This has to + // happen after all of the actions have been processed in the current + // function. + for (int I = 0, E = ActionList.size(); I < E; ++I) + if (auto *Handler + = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) + findActionRootLPads(*Handler); + ActionList.clear(); + } +} + +void llvm::calculateWinCXXEHStateNumbers(const Function *ParentFn, + WinEHFuncInfo &FuncInfo) { + // Return if it's already been done. + if (!FuncInfo.LandingPadStateMap.empty()) + return; + + WinEHNumbering Num(FuncInfo); + Num.findActionRootLPads(*ParentFn); + // The VisitedHandlers list is used by both findActionRootLPads and + // calculateStateNumbers, but both functions need to visit all handlers. + Num.VisitedHandlers.clear(); + Num.calculateStateNumbers(*ParentFn); + // Pop everything on the handler stack. + // It may be necessary to call this more than once because a handler can + // be pushed on the stack as a result of clearing the stack. + while (!Num.HandlerStack.empty()) + Num.processCallSite(None, ImmutableCallSite()); +} diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index cc2fbbdc8ca7..baab3873b915 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -352,7 +352,7 @@ void DWARFContext::parseTypeUnits() { if (!TUs.empty()) return; for (const auto &I : getTypesSections()) { - TUs.push_back(DWARFUnitSection<DWARFTypeUnit>()); + TUs.emplace_back(); TUs.back().parse(*this, I.second); } } @@ -365,7 +365,7 @@ void DWARFContext::parseDWOTypeUnits() { if (!DWOTUs.empty()) return; for (const auto &I : getTypesDWOSections()) { - DWOTUs.push_back(DWARFUnitSection<DWARFTypeUnit>()); + DWOTUs.emplace_back(); DWOTUs.back().parseDWO(*this, I.second); } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index b63af6a64ee4..a0bee0da1765 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -25,6 +25,7 @@ void DWARFDebugLine::Prologue::clear() { TotalLength = Version = PrologueLength = 0; MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; OpcodeBase = 0; + IsDWARF64 = false; StandardOpcodeLengths.clear(); IncludeDirectories.clear(); FileNames.clear(); @@ -32,9 +33,9 @@ void DWARFDebugLine::Prologue::clear() { void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << "Line table prologue:\n" - << format(" total_length: 0x%8.8x\n", TotalLength) + << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength) << format(" version: %u\n", Version) - << format(" prologue_length: 0x%8.8x\n", PrologueLength) + << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) << format(" min_inst_length: %u\n", MinInstLength) << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) << format(" default_is_stmt: %u\n", DefaultIsStmt) @@ -67,16 +68,23 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data, uint32_t *offset_ptr) { - const uint32_t prologue_offset = *offset_ptr; + const uint64_t prologue_offset = *offset_ptr; clear(); TotalLength = debug_line_data.getU32(offset_ptr); + if (TotalLength == UINT32_MAX) { + IsDWARF64 = true; + TotalLength = debug_line_data.getU64(offset_ptr); + } else if (TotalLength > 0xffffff00) { + return false; + } Version = debug_line_data.getU16(offset_ptr); if (Version < 2) return false; - PrologueLength = debug_line_data.getU32(offset_ptr); - const uint32_t end_prologue_offset = PrologueLength + *offset_ptr; + PrologueLength = debug_line_data.getUnsigned(offset_ptr, + sizeofPrologueLength()); + const uint64_t end_prologue_offset = PrologueLength + *offset_ptr; MinInstLength = debug_line_data.getU8(offset_ptr); if (Version >= 4) MaxOpsPerInst = debug_line_data.getU8(offset_ptr); @@ -114,9 +122,10 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data, } if (*offset_ptr != end_prologue_offset) { - fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should" - " have ended at 0x%8.8x but it ended at 0x%8.8x\n", - prologue_offset, end_prologue_offset, *offset_ptr); + fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64 + " should have ended at 0x%8.8" PRIx64 + " but it ended at 0x%8.8" PRIx64 "\n", + prologue_offset, end_prologue_offset, (uint64_t)*offset_ptr); return false; } return true; @@ -258,7 +267,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, } const uint32_t end_offset = debug_line_offset + Prologue.TotalLength + - sizeof(Prologue.TotalLength); + Prologue.sizeofTotalLength(); ParsingState State(this); @@ -522,10 +531,36 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, return end_offset; } +uint32_t +DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &seq, + uint64_t address) const { + if (!seq.containsPC(address)) + return UnknownRowIndex; + // Search for instruction address in the rows describing the sequence. + // Rows are stored in a vector, so we may use arithmetical operations with + // iterators. + DWARFDebugLine::Row row; + row.Address = address; + RowIter first_row = Rows.begin() + seq.FirstRowIndex; + RowIter last_row = Rows.begin() + seq.LastRowIndex; + LineTable::RowIter row_pos = std::lower_bound( + first_row, last_row, row, DWARFDebugLine::Row::orderByAddress); + if (row_pos == last_row) { + return seq.LastRowIndex - 1; + } + uint32_t index = seq.FirstRowIndex + (row_pos - first_row); + if (row_pos->Address > address) { + if (row_pos == first_row) + return UnknownRowIndex; + else + index--; + } + return index; +} + uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { - uint32_t unknown_index = UINT32_MAX; if (Sequences.empty()) - return unknown_index; + return UnknownRowIndex; // First, find an instruction sequence containing the given address. DWARFDebugLine::Sequence sequence; sequence.LowPC = address; @@ -540,31 +575,10 @@ uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { found_seq = *seq_pos; } else { if (seq_pos == first_seq) - return unknown_index; + return UnknownRowIndex; found_seq = *(seq_pos - 1); } - if (!found_seq.containsPC(address)) - return unknown_index; - // Search for instruction address in the rows describing the sequence. - // Rows are stored in a vector, so we may use arithmetical operations with - // iterators. - DWARFDebugLine::Row row; - row.Address = address; - RowIter first_row = Rows.begin() + found_seq.FirstRowIndex; - RowIter last_row = Rows.begin() + found_seq.LastRowIndex; - RowIter row_pos = std::lower_bound(first_row, last_row, row, - DWARFDebugLine::Row::orderByAddress); - if (row_pos == last_row) { - return found_seq.LastRowIndex - 1; - } - uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row); - if (row_pos->Address > address) { - if (row_pos == first_row) - return unknown_index; - else - index--; - } - return index; + return findRowInSeq(found_seq, address); } bool DWARFDebugLine::LineTable::lookupAddressRange( @@ -593,45 +607,21 @@ bool DWARFDebugLine::LineTable::lookupAddressRange( // index we just calculated while (seq_pos != last_seq && seq_pos->LowPC < end_addr) { - DWARFDebugLine::Sequence cur_seq = *seq_pos; - uint32_t first_row_index; - uint32_t last_row_index; - if (seq_pos == start_pos) { - // For the first sequence, we need to find which row in the sequence is the - // first in our range. Rows are stored in a vector, so we may use - // arithmetical operations with iterators. - DWARFDebugLine::Row row; - row.Address = address; - RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex; - RowIter last_row = Rows.begin() + cur_seq.LastRowIndex; - RowIter row_pos = std::upper_bound(first_row, last_row, row, - DWARFDebugLine::Row::orderByAddress); - // The 'row_pos' iterator references the first row that is greater than - // our start address. Unless that's the first row, we want to start at - // the row before that. - first_row_index = cur_seq.FirstRowIndex + (row_pos - first_row); - if (row_pos != first_row) - --first_row_index; - } else - first_row_index = cur_seq.FirstRowIndex; - - // For the last sequence in our range, we need to figure out the last row in - // range. For all other sequences we can go to the end of the sequence. - if (cur_seq.HighPC > end_addr) { - DWARFDebugLine::Row row; - row.Address = end_addr; - RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex; - RowIter last_row = Rows.begin() + cur_seq.LastRowIndex; - RowIter row_pos = std::upper_bound(first_row, last_row, row, - DWARFDebugLine::Row::orderByAddress); - // The 'row_pos' iterator references the first row that is greater than - // our end address. The row before that is the last row we want. - last_row_index = cur_seq.FirstRowIndex + (row_pos - first_row) - 1; - } else - // Contrary to what you might expect, DWARFDebugLine::SequenceLastRowIndex - // isn't a valid index within the current sequence. It's that plus one. + const DWARFDebugLine::Sequence &cur_seq = *seq_pos; + // For the first sequence, we need to find which row in the sequence is the + // first in our range. + uint32_t first_row_index = cur_seq.FirstRowIndex; + if (seq_pos == start_pos) + first_row_index = findRowInSeq(cur_seq, address); + + // Figure out the last row in the range. + uint32_t last_row_index = findRowInSeq(cur_seq, end_addr - 1); + if (last_row_index == UnknownRowIndex) last_row_index = cur_seq.LastRowIndex - 1; + assert(first_row_index != UnknownRowIndex); + assert(last_row_index != UnknownRowIndex); + for (uint32_t i = first_row_index; i <= last_row_index; ++i) { result.push_back(i); } diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index d7038fd2c9a8..9e71b108280b 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -438,7 +438,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, if (NumArgs > 2) { std::vector<std::string> EnvVars; for (unsigned i = 0; envp[i]; ++i) - EnvVars.push_back(envp[i]); + EnvVars.emplace_back(envp[i]); // Arg #2 = envp. GVArgs.push_back(PTOGV(CEnv.reset(Fn->getContext(), this, EnvVars))); } diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index ce1ab594dfa3..55ab5af2b909 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetOptions.h" #include <cstring> using namespace llvm; @@ -254,11 +255,8 @@ int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F, unsigned ArgC, const char * const *ArgV, const char * const *EnvP) { unwrap(EE)->finalizeObject(); - - std::vector<std::string> ArgVec; - for (unsigned I = 0; I != ArgC; ++I) - ArgVec.push_back(ArgV[I]); - + + std::vector<std::string> ArgVec(ArgV, ArgV + ArgC); return unwrap(EE)->runFunctionAsMain(unwrap<Function>(F), ArgVec, EnvP); } diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index ec67019947ab..08d9d6b05a0a 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -119,10 +119,9 @@ void IntelJITEventListener::NotifyObjectEmitted( if (SymType == SymbolRef::ST_Function) { StringRef Name; uint64_t Addr; - uint64_t Size; if (I->getName(Name)) continue; if (I->getAddress(Addr)) continue; - if (I->getSize(Size)) continue; + uint64_t Size = I->getSize(); // Record this address in a local vector Functions.push_back((void*)Addr); diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index a26740b0da8a..39a8027005f8 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -2079,7 +2079,7 @@ void Interpreter::callFunction(Function *F, ECStack.back().Caller.arg_size() == ArgVals.size()) && "Incorrect number of arguments passed into function call!"); // Make a new stack frame... and fill it in. - ECStack.push_back(ExecutionContext()); + ECStack.emplace_back(); ExecutionContext &StackFrame = ECStack.back(); StackFrame.CurFunction = F; diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt index 8509852b652e..9d29a41f5041 100644 --- a/lib/ExecutionEngine/LLVMBuild.txt +++ b/lib/ExecutionEngine/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc type = Library name = ExecutionEngine parent = Libraries -required_libraries = Core MC Object RuntimeDyld Support +required_libraries = Core MC Object RuntimeDyld Support Target diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 24a3ec19aed4..6d64d6837430 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -120,7 +120,7 @@ static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { if (Address == UnknownAddressOrSize) { Result = UnknownAddressOrSize; - return object_error::success; + return std::error_code(); } const ObjectFile *Obj = Sym.getObject(); @@ -130,12 +130,12 @@ static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { if (SecI == Obj->section_end()) { Result = UnknownAddressOrSize; - return object_error::success; + return std::error_code(); } uint64_t SectionAddress = SecI->getAddress(); Result = Address - SectionAddress; - return object_error::success; + return std::error_code(); } std::pair<unsigned, unsigned> @@ -149,6 +149,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { // Save information about our target Arch = (Triple::ArchType)Obj.getArch(); IsTargetLittleEndian = Obj.isLittleEndian(); + setMipsABI(Obj); // Compute the memory size required to load all sections to be loaded // and pass this information to the memory manager @@ -386,8 +387,7 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, uint32_t Flags = I->getFlags(); if (Flags & SymbolRef::SF_Common) { // Add the common symbols to a list. We'll allocate them all below. - uint64_t Size = 0; - Check(I->getSize(Size)); + uint64_t Size = I->getSize(); CommonSize += Size; } } @@ -493,10 +493,8 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, continue; } - uint32_t Align = 0; - uint64_t Size = 0; - Check(Sym.getAlignment(Align)); - Check(Sym.getSize(Size)); + uint32_t Align = Sym.getAlignment(); + uint64_t Size = Sym.getSize(); CommonSize += Align + Size; SymbolsToAllocate.push_back(Sym); @@ -517,11 +515,9 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, // Assign the address of each symbol for (auto &Sym : SymbolsToAllocate) { - uint32_t Align; - uint64_t Size; + uint32_t Align = Sym.getAlignment(); StringRef Name; - Check(Sym.getAlignment(Align)); - Check(Sym.getSize(Size)); + uint64_t Size = Sym.getSize(); Check(Sym.getName(Name)); if (Align) { // This symbol has an alignment requirement. @@ -689,7 +685,7 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, // and stubs for branches Thumb - ARM and ARM - Thumb. writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc,<label> return Addr + 4; - } else if (Arch == Triple::mipsel || Arch == Triple::mips) { + } else if (IsMipsO32ABI) { // 0: 3c190000 lui t9,%hi(addr). // 4: 27390000 addiu t9,t9,%lo(addr). // 8: 03200008 jr t9. diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 95421b35db5c..b4a34e8acf3e 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -157,16 +157,16 @@ OwningBinary<ObjectFile> createELFDebugObject(const ObjectFile &Obj, std::unique_ptr<ObjectFile> DebugObj; if (Obj.getBytesInAddress() == 4 && Obj.isLittleEndian()) { - typedef ELFType<support::little, 2, false> ELF32LE; + typedef ELFType<support::little, false> ELF32LE; DebugObj = createRTDyldELFObject<ELF32LE>(Buffer->getMemBufferRef(), L, ec); } else if (Obj.getBytesInAddress() == 4 && !Obj.isLittleEndian()) { - typedef ELFType<support::big, 2, false> ELF32BE; + typedef ELFType<support::big, false> ELF32BE; DebugObj = createRTDyldELFObject<ELF32BE>(Buffer->getMemBufferRef(), L, ec); } else if (Obj.getBytesInAddress() == 8 && !Obj.isLittleEndian()) { - typedef ELFType<support::big, 2, true> ELF64BE; + typedef ELFType<support::big, true> ELF64BE; DebugObj = createRTDyldELFObject<ELF64BE>(Buffer->getMemBufferRef(), L, ec); } else if (Obj.getBytesInAddress() == 8 && Obj.isLittleEndian()) { - typedef ELFType<support::little, 2, true> ELF64LE; + typedef ELFType<support::little, true> ELF64LE; DebugObj = createRTDyldELFObject<ELF64LE>(Buffer->getMemBufferRef(), L, ec); } else llvm_unreachable("Unexpected ELF format"); @@ -477,34 +477,243 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { - uint32_t *TargetPtr = (uint32_t *)(Section.Address + Offset); + uint8_t *TargetPtr = Section.Address + Offset; Value += Addend; - DEBUG(dbgs() << "resolveMipselocation, LocalAddress: " + DEBUG(dbgs() << "resolveMIPSRelocation, LocalAddress: " << Section.Address + Offset << " FinalAddress: " << format("%p", Section.LoadAddress + Offset) << " Value: " << format("%x", Value) << " Type: " << format("%x", Type) << " Addend: " << format("%x", Addend) << "\n"); + uint32_t Insn = readBytesUnaligned(TargetPtr, 4); + switch (Type) { default: llvm_unreachable("Not implemented relocation type!"); break; case ELF::R_MIPS_32: - *TargetPtr = Value; + writeBytesUnaligned(Value, TargetPtr, 4); break; case ELF::R_MIPS_26: - *TargetPtr = ((*TargetPtr) & 0xfc000000) | ((Value & 0x0fffffff) >> 2); + Insn &= 0xfc000000; + Insn |= (Value & 0x0fffffff) >> 2; + writeBytesUnaligned(Insn, TargetPtr, 4); break; case ELF::R_MIPS_HI16: // Get the higher 16-bits. Also add 1 if bit 15 is 1. - *TargetPtr = - ((*TargetPtr) & 0xffff0000) | (((Value + 0x8000) >> 16) & 0xffff); + Insn &= 0xffff0000; + Insn |= ((Value + 0x8000) >> 16) & 0xffff; + writeBytesUnaligned(Insn, TargetPtr, 4); break; case ELF::R_MIPS_LO16: - *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff); + Insn &= 0xffff0000; + Insn |= Value & 0xffff; + writeBytesUnaligned(Insn, TargetPtr, 4); break; + case ELF::R_MIPS_PC32: + uint32_t FinalAddress = (Section.LoadAddress + Offset); + writeBytesUnaligned(Value + Addend - FinalAddress, (uint8_t *)TargetPtr, 4); + break; + } +} + +void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { + if (!StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { + IsMipsO32ABI = false; + IsMipsN64ABI = false; + return; + } + unsigned AbiVariant; + Obj.getPlatformFlags(AbiVariant); + IsMipsO32ABI = AbiVariant & ELF::EF_MIPS_ABI_O32; + IsMipsN64ABI = Obj.getFileFormatName().equals("ELF64-mips"); + if (AbiVariant & ELF::EF_MIPS_ABI2) + llvm_unreachable("Mips N32 ABI is not supported yet"); +} + +void RuntimeDyldELF::resolveMIPS64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend, + uint64_t SymOffset, + SID SectionID) { + uint32_t r_type = Type & 0xff; + uint32_t r_type2 = (Type >> 8) & 0xff; + uint32_t r_type3 = (Type >> 16) & 0xff; + + // RelType is used to keep information for which relocation type we are + // applying relocation. + uint32_t RelType = r_type; + int64_t CalculatedValue = evaluateMIPS64Relocation(Section, Offset, Value, + RelType, Addend, + SymOffset, SectionID); + if (r_type2 != ELF::R_MIPS_NONE) { + RelType = r_type2; + CalculatedValue = evaluateMIPS64Relocation(Section, Offset, 0, RelType, + CalculatedValue, SymOffset, + SectionID); + } + if (r_type3 != ELF::R_MIPS_NONE) { + RelType = r_type3; + CalculatedValue = evaluateMIPS64Relocation(Section, Offset, 0, RelType, + CalculatedValue, SymOffset, + SectionID); } + applyMIPS64Relocation(Section.Address + Offset, CalculatedValue, RelType); +} + +int64_t +RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend, + uint64_t SymOffset, SID SectionID) { + + DEBUG(dbgs() << "evaluateMIPS64Relocation, LocalAddress: 0x" + << format("%llx", Section.Address + Offset) + << " FinalAddress: 0x" + << format("%llx", Section.LoadAddress + Offset) + << " Value: 0x" << format("%llx", Value) << " Type: 0x" + << format("%x", Type) << " Addend: 0x" << format("%llx", Addend) + << " SymOffset: " << format("%x", SymOffset) + << "\n"); + + switch (Type) { + default: + llvm_unreachable("Not implemented relocation type!"); + break; + case ELF::R_MIPS_JALR: + case ELF::R_MIPS_NONE: + break; + case ELF::R_MIPS_32: + case ELF::R_MIPS_64: + return Value + Addend; + case ELF::R_MIPS_26: + return ((Value + Addend) >> 2) & 0x3ffffff; + case ELF::R_MIPS_GPREL16: { + uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]); + return Value + Addend - (GOTAddr + 0x7ff0); + } + case ELF::R_MIPS_SUB: + return Value - Addend; + case ELF::R_MIPS_HI16: + // Get the higher 16-bits. Also add 1 if bit 15 is 1. + return ((Value + Addend + 0x8000) >> 16) & 0xffff; + case ELF::R_MIPS_LO16: + return (Value + Addend) & 0xffff; + case ELF::R_MIPS_CALL16: + case ELF::R_MIPS_GOT_DISP: + case ELF::R_MIPS_GOT_PAGE: { + uint8_t *LocalGOTAddr = + getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset; + uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, 8); + + Value += Addend; + if (Type == ELF::R_MIPS_GOT_PAGE) + Value = (Value + 0x8000) & ~0xffff; + + if (GOTEntry) + assert(GOTEntry == Value && + "GOT entry has two different addresses."); + else + writeBytesUnaligned(Value, LocalGOTAddr, 8); + + return (SymOffset - 0x7ff0) & 0xffff; + } + case ELF::R_MIPS_GOT_OFST: { + int64_t page = (Value + Addend + 0x8000) & ~0xffff; + return (Value + Addend - page) & 0xffff; + } + case ELF::R_MIPS_GPREL32: { + uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]); + return Value + Addend - (GOTAddr + 0x7ff0); + } + case ELF::R_MIPS_PC16: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + return ((Value + Addend - FinalAddress - 4) >> 2) & 0xffff; + } + case ELF::R_MIPS_PC32: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + return Value + Addend - FinalAddress; + } + case ELF::R_MIPS_PC18_S3: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + return ((Value + Addend - ((FinalAddress | 7) ^ 7)) >> 3) & 0x3ffff; + } + case ELF::R_MIPS_PC19_S2: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + return ((Value + Addend - FinalAddress) >> 2) & 0x7ffff; + } + case ELF::R_MIPS_PC21_S2: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + return ((Value + Addend - FinalAddress) >> 2) & 0x1fffff; + } + case ELF::R_MIPS_PC26_S2: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + return ((Value + Addend - FinalAddress) >> 2) & 0x3ffffff; + } + case ELF::R_MIPS_PCHI16: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + return ((Value + Addend - FinalAddress + 0x8000) >> 16) & 0xffff; + } + case ELF::R_MIPS_PCLO16: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + return (Value + Addend - FinalAddress) & 0xffff; + } + } + return 0; +} + +void RuntimeDyldELF::applyMIPS64Relocation(uint8_t *TargetPtr, + int64_t CalculatedValue, + uint32_t Type) { + uint32_t Insn = readBytesUnaligned(TargetPtr, 4); + + switch (Type) { + default: + break; + case ELF::R_MIPS_32: + case ELF::R_MIPS_GPREL32: + case ELF::R_MIPS_PC32: + writeBytesUnaligned(CalculatedValue & 0xffffffff, TargetPtr, 4); + break; + case ELF::R_MIPS_64: + case ELF::R_MIPS_SUB: + writeBytesUnaligned(CalculatedValue, TargetPtr, 8); + break; + case ELF::R_MIPS_26: + case ELF::R_MIPS_PC26_S2: + Insn = (Insn & 0xfc000000) | CalculatedValue; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + case ELF::R_MIPS_GPREL16: + Insn = (Insn & 0xffff0000) | (CalculatedValue & 0xffff); + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + case ELF::R_MIPS_HI16: + case ELF::R_MIPS_LO16: + case ELF::R_MIPS_PCHI16: + case ELF::R_MIPS_PCLO16: + case ELF::R_MIPS_PC16: + case ELF::R_MIPS_CALL16: + case ELF::R_MIPS_GOT_DISP: + case ELF::R_MIPS_GOT_PAGE: + case ELF::R_MIPS_GOT_OFST: + Insn = (Insn & 0xffff0000) | CalculatedValue; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + case ELF::R_MIPS_PC18_S3: + Insn = (Insn & 0xfffc0000) | CalculatedValue; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + case ELF::R_MIPS_PC19_S2: + Insn = (Insn & 0xfff80000) | CalculatedValue; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + case ELF::R_MIPS_PC21_S2: + Insn = (Insn & 0xffe00000) | CalculatedValue; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + } } // Return the .TOC. section and offset. @@ -521,17 +730,15 @@ void RuntimeDyldELF::findPPC64TOCSection(const ObjectFile &Obj, // The TOC consists of sections .got, .toc, .tocbss, .plt in that // order. The TOC starts where the first of these sections starts. - for (section_iterator si = Obj.section_begin(), se = Obj.section_end(); - si != se; ++si) { - + for (auto &Section: Obj.sections()) { StringRef SectionName; - check(si->getName(SectionName)); + check(Section.getName(SectionName)); if (SectionName == ".got" || SectionName == ".toc" || SectionName == ".tocbss" || SectionName == ".plt") { - Rel.SectionID = findOrEmitSection(Obj, *si, false, LocalSections); + Rel.SectionID = findOrEmitSection(Obj, Section, false, LocalSections); break; } } @@ -784,13 +991,13 @@ void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE, uint64_t Value) { const SectionEntry &Section = Sections[RE.SectionID]; return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend, - RE.SymOffset); + RE.SymOffset, RE.SectionID); } void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend, - uint64_t SymOffset) { + uint64_t SymOffset, SID SectionID) { switch (Arch) { case Triple::x86_64: resolveX86_64Relocation(Section, Offset, Value, Type, Addend, SymOffset); @@ -812,8 +1019,16 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, break; case Triple::mips: // Fall through. case Triple::mipsel: - resolveMIPSRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), - Type, (uint32_t)(Addend & 0xffffffffL)); + case Triple::mips64: + case Triple::mips64el: + if (IsMipsO32ABI) + resolveMIPSRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), + Type, (uint32_t)(Addend & 0xffffffffL)); + else if (IsMipsN64ABI) + resolveMIPS64Relocation(Section, Offset, Value, Type, Addend, SymOffset, + SectionID); + else + llvm_unreachable("Mips ABI not handled"); break; case Triple::ppc64: // Fall through. case Triple::ppc64le: @@ -999,8 +1214,10 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( } processSimpleRelocation(SectionID, Offset, RelType, Value); } - } else if ((Arch == Triple::mipsel || Arch == Triple::mips)) { - uint32_t *Placeholder = reinterpret_cast<uint32_t*>(computePlaceholderAddress(SectionID, Offset)); + } else if (IsMipsO32ABI) { + uint8_t *Placeholder = reinterpret_cast<uint8_t *>( + computePlaceholderAddress(SectionID, Offset)); + uint32_t Opcode = readBytesUnaligned(Placeholder, 4); if (RelType == ELF::R_MIPS_26) { // This is an Mips branch relocation, need to use a stub function. DEBUG(dbgs() << "\t\tThis is a Mips branch relocation."); @@ -1009,7 +1226,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // Extract the addend from the instruction. // We shift up by two since the Value will be down shifted again // when applying the relocation. - uint32_t Addend = ((*Placeholder) & 0x03ffffff) << 2; + uint32_t Addend = (Opcode & 0x03ffffff) << 2; Value.Addend += Addend; @@ -1047,13 +1264,30 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( } } else { if (RelType == ELF::R_MIPS_HI16) - Value.Addend += ((*Placeholder) & 0x0000ffff) << 16; + Value.Addend += (Opcode & 0x0000ffff) << 16; else if (RelType == ELF::R_MIPS_LO16) - Value.Addend += ((*Placeholder) & 0x0000ffff); + Value.Addend += (Opcode & 0x0000ffff); else if (RelType == ELF::R_MIPS_32) - Value.Addend += *Placeholder; + Value.Addend += Opcode; processSimpleRelocation(SectionID, Offset, RelType, Value); } + } else if (IsMipsN64ABI) { + uint32_t r_type = RelType & 0xff; + RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); + if (r_type == ELF::R_MIPS_CALL16 || r_type == ELF::R_MIPS_GOT_PAGE + || r_type == ELF::R_MIPS_GOT_DISP) { + StringMap<uint64_t>::iterator i = GOTSymbolOffsets.find(TargetName); + if (i != GOTSymbolOffsets.end()) + RE.SymOffset = i->second; + else { + RE.SymOffset = allocateGOTEntries(SectionID, 1); + GOTSymbolOffsets[TargetName] = RE.SymOffset; + } + } + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) { if (RelType == ELF::R_PPC64_REL24) { // Determine ABI variant in use for this object. @@ -1356,9 +1590,18 @@ size_t RuntimeDyldELF::getGOTEntrySize() { case Triple::x86: case Triple::arm: case Triple::thumb: + Result = sizeof(uint32_t); + break; case Triple::mips: case Triple::mipsel: - Result = sizeof(uint32_t); + case Triple::mips64: + case Triple::mips64el: + if (IsMipsO32ABI) + Result = sizeof(uint32_t); + else if (IsMipsN64ABI) + Result = sizeof(uint64_t); + else + llvm_unreachable("Mips ABI not handled"); break; default: llvm_unreachable("Unsupported CPU type!"); @@ -1413,6 +1656,20 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, // For now, initialize all GOT entries to zero. We'll fill them in as // needed when GOT-based relocations are applied. memset(Addr, 0, TotalSize); + if (IsMipsN64ABI) { + // To correctly resolve Mips GOT relocations, we need a mapping from + // object's sections to GOTs. + for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); + SI != SE; ++SI) { + if (SI->relocation_begin() != SI->relocation_end()) { + section_iterator RelocatedSection = SI->getRelocatedSection(); + ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); + assert (i != SectionMap.end()); + SectionToGOTMap[i->second] = GOTSectionID; + } + } + GOTSymbolOffsets.clear(); + } } // Look for and record the EH frame section. diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 9a4a8630e4c4..3a377a2e162c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -25,7 +25,7 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend, - uint64_t SymOffset = 0); + uint64_t SymOffset = 0, SID SectionID = 0); void resolveX86_64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend, @@ -49,12 +49,24 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); + void resolveMIPS64Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend, + uint64_t SymOffset, SID SectionID); + + int64_t evaluateMIPS64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend, + uint64_t SymOffset, SID SectionID); + + void applyMIPS64Relocation(uint8_t *TargetPtr, int64_t CalculatedValue, + uint32_t Type); + unsigned getMaxStubSize() override { if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) return 20; // movz; movk; movk; movk; br if (Arch == Triple::arm || Arch == Triple::thumb) return 8; // 32-bit instruction and 32-bit address - else if (Arch == Triple::mipsel || Arch == Triple::mips) + else if (IsMipsO32ABI) return 16; else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) return 44; @@ -73,6 +85,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl { return 1; } + void setMipsABI(const ObjectFile &Obj) override; + void findPPC64TOCSection(const ObjectFile &Obj, ObjSectionToIDMap &LocalSections, RelocationValueRef &Rel); @@ -114,6 +128,13 @@ class RuntimeDyldELF : public RuntimeDyldImpl { // that consume more than one slot) unsigned CurrentGOTIndex; + // A map from section to a GOT section that has entries for section's GOT + // relocations. (Mips64 specific) + DenseMap<SID, SID> SectionToGOTMap; + + // A map to avoid duplicate got entries (Mips64 specific) + StringMap<uint64_t> GOTSymbolOffsets; + // When a module is loaded we save the SectionID of the EH frame section // in a table until we receive a request to register all unregistered // EH frame sections with the memory manager. diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 90e61a50a1b0..e085a9296e8d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -236,6 +236,8 @@ protected: Triple::ArchType Arch; bool IsTargetLittleEndian; + bool IsMipsO32ABI; + bool IsMipsN64ABI; // True if all sections should be passed to the memory manager, false if only // sections containing relocations should be. Defaults to 'false'. @@ -303,6 +305,11 @@ protected: *(Addr + 7) = Value & 0xFF; } + virtual void setMipsABI(const ObjectFile &Obj) { + IsMipsO32ABI = false; + IsMipsN64ABI = false; + } + /// Endian-aware read Read the least significant Size bytes from Src. uint64_t readBytesUnaligned(uint8_t *Src, unsigned Size) const; diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h index 053f90ce05a1..dd454ae54f26 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h @@ -167,20 +167,19 @@ private: uint32_t SectionBID = findOrEmitSection(Obj, SectionB, IsCode, ObjSectionToID); - if (Addend != AddrA - AddrB) - Error("Unexpected SECTDIFF relocation addend."); + // Compute the addend 'C' from the original expression 'A - B + C'. + Addend -= AddrA - AddrB; DEBUG(dbgs() << "Found SECTDIFF: AddrA: " << AddrA << ", AddrB: " << AddrB << ", Addend: " << Addend << ", SectionA ID: " << SectionAID << ", SectionAOffset: " << SectionAOffset << ", SectionB ID: " << SectionBID << ", SectionBOffset: " << SectionBOffset << "\n"); - RelocationEntry R(SectionID, Offset, RelocType, 0, SectionAID, - SectionAOffset, SectionBID, SectionBOffset, IsPCRel, - Size); + RelocationEntry R(SectionID, Offset, RelocType, Addend, SectionAID, + SectionAOffset, SectionBID, SectionBOffset, + IsPCRel, Size); addRelocationForSection(R, SectionAID); - addRelocationForSection(R, SectionBID); return ++RelI; } diff --git a/lib/Fuzzer/FuzzerInterface.h b/lib/Fuzzer/FuzzerInterface.h index 8cf9962221c7..3fd807afcfeb 100644 --- a/lib/Fuzzer/FuzzerInterface.h +++ b/lib/Fuzzer/FuzzerInterface.h @@ -69,12 +69,12 @@ class UserSuppliedFuzzer { /// Executes the target function on 'Size' bytes of 'Data'. virtual void TargetFunction(const uint8_t *Data, size_t Size) = 0; /// Mutates 'Size' bytes of data in 'Data' inplace into up to 'MaxSize' bytes, - /// returns the new size of the data. + /// returns the new size of the data, which should be positive. virtual size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { return BasicMutate(Data, Size, MaxSize); } /// Crosses 'Data1' and 'Data2', writes up to 'MaxOutSize' bytes into Out, - /// returns the number of bytes written. + /// returns the number of bytes written, which should be positive. virtual size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, size_t Size2, uint8_t *Out, size_t MaxOutSize) { diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 4c8b247a72ed..9ef47583cbb9 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -289,7 +289,9 @@ void Fuzzer::MutateAndTestOne(Unit *U) { size_t Size = U->size(); U->resize(Options.MaxLen); size_t NewSize = USF.Mutate(U->data(), Size, U->size()); - assert(NewSize > 0 && NewSize <= (size_t)Options.MaxLen); + assert(NewSize > 0 && "Mutator returned empty unit"); + assert(NewSize <= (size_t)Options.MaxLen && + "Mutator return overisized unit"); U->resize(NewSize); RunOneAndUpdateCorpus(*U); size_t NumTraceBasedMutations = StopTraceRecording(); @@ -317,7 +319,9 @@ void Fuzzer::Loop(size_t NumIterations) { size_t NewSize = USF.CrossOver( Corpus[J1].data(), Corpus[J1].size(), Corpus[J2].data(), Corpus[J2].size(), CurrentUnit.data(), CurrentUnit.size()); - assert(NewSize > 0 && NewSize <= (size_t)Options.MaxLen); + assert(NewSize > 0 && "CrossOver returned empty unit"); + assert(NewSize <= (size_t)Options.MaxLen && + "CrossOver return overisized unit"); CurrentUnit.resize(NewSize); MutateAndTestOne(&CurrentUnit); } diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp index ddb0764930fe..b2e1e956dfcf 100644 --- a/lib/Fuzzer/FuzzerTraceState.cpp +++ b/lib/Fuzzer/FuzzerTraceState.cpp @@ -332,7 +332,7 @@ extern "C" { void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, uint64_t Arg2, dfsan_label L0, dfsan_label L1, dfsan_label L2) { - assert(TS); + if (!TS) return; assert(L0 == 0); uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0)); uint64_t CmpSize = (SizeAndType >> 32) / 8; @@ -343,7 +343,7 @@ void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label) { - assert(TS); + if (!TS) return; uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc); uint64_t S1 = 0, S2 = 0; // Simplification: handle only first 8 bytes. diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 1089cb582180..0744fdf40157 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -2140,27 +2140,20 @@ void AssemblyWriter::printModule(const Module *M) { Out << "target triple = \"" << M->getTargetTriple() << "\"\n"; if (!M->getModuleInlineAsm().empty()) { - // Split the string into lines, to make it easier to read the .ll file. - std::string Asm = M->getModuleInlineAsm(); - size_t CurPos = 0; - size_t NewLine = Asm.find_first_of('\n', CurPos); Out << '\n'; - while (NewLine != std::string::npos) { + + // Split the string into lines, to make it easier to read the .ll file. + StringRef Asm = M->getModuleInlineAsm(); + do { + StringRef Front; + std::tie(Front, Asm) = Asm.split('\n'); + // We found a newline, print the portion of the asm string from the // last newline up to this newline. Out << "module asm \""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine), - Out); - Out << "\"\n"; - CurPos = NewLine+1; - NewLine = Asm.find_first_of('\n', CurPos); - } - std::string rest(Asm.begin()+CurPos, Asm.end()); - if (!rest.empty()) { - Out << "module asm \""; - PrintEscapedString(rest, Out); + PrintEscapedString(Front, Out); Out << "\"\n"; - } + } while (!Asm.empty()); } printTypeIdentities(); @@ -2215,15 +2208,13 @@ void AssemblyWriter::printModule(const Module *M) { } } -void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { - Out << '!'; - StringRef Name = NMD->getName(); +static void printMetadataIdentifier(StringRef Name, + formatted_raw_ostream &Out) { if (Name.empty()) { Out << "<empty name> "; } else { - if (isalpha(static_cast<unsigned char>(Name[0])) || - Name[0] == '-' || Name[0] == '$' || - Name[0] == '.' || Name[0] == '_') + if (isalpha(static_cast<unsigned char>(Name[0])) || Name[0] == '-' || + Name[0] == '$' || Name[0] == '.' || Name[0] == '_') Out << Name[0]; else Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F); @@ -2236,9 +2227,15 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); } } +} + +void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { + Out << '!'; + printMetadataIdentifier(NMD->getName(), Out); Out << " = !{"; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - if (i) Out << ", "; + if (i) + Out << ", "; int Slot = Machine.getMetadataSlot(NMD->getOperand(i)); if (Slot == -1) Out << "<badref>"; @@ -2248,7 +2245,6 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { Out << "}\n"; } - static void PrintLinkage(GlobalValue::LinkageTypes LT, formatted_raw_ostream &Out) { switch (LT) { @@ -2268,7 +2264,6 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT, } } - static void PrintVisibility(GlobalValue::VisibilityTypes Vis, formatted_raw_ostream &Out) { switch (Vis) { @@ -3008,9 +3003,10 @@ void AssemblyWriter::printMetadataAttachments( for (const auto &I : MDs) { unsigned Kind = I.first; Out << Separator; - if (Kind < MDNames.size()) - Out << "!" << MDNames[Kind]; - else + if (Kind < MDNames.size()) { + Out << "!"; + printMetadataIdentifier(MDNames[Kind], Out); + } else Out << "!<unknown kind #" << Kind << ">"; Out << ' '; WriteAsOperandInternal(Out, I.second, &TypePrinter, &Machine, TheModule); diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 3f64c43e902d..2efc61242eac 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -2165,9 +2165,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, // factored out into preceding dimensions. bool Unknown = false; SmallVector<Constant *, 8> NewIdxs; - Type *Ty = PointeeTy; - Type *Prev = C->getType(); - for (unsigned i = 1, e = Idxs.size(); i != e; + Type *Ty = C->getType(); + Type *Prev = nullptr; + for (unsigned i = 0, e = Idxs.size(); i != e; Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) { if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 9557cda5a9c8..d476434542ea 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -461,6 +461,11 @@ void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) { *Dest++ = wrap(*I); } +LLVMTypeRef LLVMStructGetTypeAtIndex(LLVMTypeRef StructTy, unsigned i) { + StructType *Ty = unwrap<StructType>(StructTy); + return wrap(Ty->getTypeAtIndex(i)); +} + LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) { return unwrap<StructType>(StructTy)->isPacked(); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 1e9d9a5695dc..1478bffe7c35 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1249,7 +1249,8 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI) OperandTraits<GetElementPtrInst>::op_end(this) - GEPI.getNumOperands(), GEPI.getNumOperands()), - SourceElementType(GEPI.SourceElementType) { + SourceElementType(GEPI.SourceElementType), + ResultElementType(GEPI.ResultElementType) { std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin()); SubclassOptionalData = GEPI.SubclassOptionalData; } @@ -2120,7 +2121,7 @@ unsigned CastInst::isEliminableCastPair( { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToSI | { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // UIToFP +- firstOp { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // SIToFP | - { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4, 0}, // FPTrunc | + { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // FPTrunc | { 99,99,99, 2, 2,99,99,10, 2,99,99, 4, 0}, // FPExt | { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3, 0}, // PtrToInt | { 99,99,99,99,99,99,99,99,99,11,99,15, 0}, // IntToPtr | diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 44c45326c12e..7bcd829f9f5e 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -240,15 +240,12 @@ void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) { // Metadata Kind Uniquing //===----------------------------------------------------------------------===// -/// getMDKindID - Return a unique non-zero ID for the specified metadata kind. +/// Return a unique non-zero ID for the specified metadata kind. unsigned LLVMContext::getMDKindID(StringRef Name) const { - assert(!std::isdigit(Name.front()) && - "Named metadata may not start with a digit"); - // If this is new, assign it its ID. - return pImpl->CustomMDKindNames.insert(std::make_pair( - Name, - pImpl->CustomMDKindNames.size())) + return pImpl->CustomMDKindNames.insert( + std::make_pair( + Name, pImpl->CustomMDKindNames.size())) .first->second; } diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index f81db6077704..3a573362b411 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -922,6 +922,8 @@ public: DenseMap<Value *, ValueAsMetadata *> ValuesAsMetadata; DenseMap<Metadata *, MetadataAsValue *> MetadataAsValues; + DenseMap<const Value*, ValueName*> ValueNames; + #define HANDLE_MDNODE_LEAF(CLASS) DenseSet<CLASS *, CLASS##Info> CLASS##s; #include "llvm/IR/Metadata.def" diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index bbec6426c5d9..27d98a279fe2 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -293,10 +293,8 @@ public: // Delete on the fly managers. ~MPPassManager() override { - for (std::map<Pass *, FunctionPassManagerImpl *>::iterator - I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); - I != E; ++I) { - FunctionPassManagerImpl *FPP = I->second; + for (auto &OnTheFlyManager : OnTheFlyManagers) { + FunctionPassManagerImpl *FPP = OnTheFlyManager.second; delete FPP; } } @@ -465,9 +463,8 @@ public: ~TimingInfo() { // Delete all of the timers, which accumulate their info into the // TimerGroup. - for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(), - E = TimingData.end(); I != E; ++I) - delete I->second; + for (auto &I : TimingData) + delete I.second; // TimerGroup is deleted next, printing the report. } @@ -510,9 +507,7 @@ PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) { if (P->getResolver()) PDepth = P->getResolver()->getPMDataManager().getDepth(); - for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(), - E = AnalysisPasses.end(); I != E; ++I) { - Pass *AP = *I; + for (Pass *AP : AnalysisPasses) { LastUser[AP] = P; if (P == AP) @@ -693,22 +688,19 @@ void PMTopLevelManager::schedulePass(Pass *P) { Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { // Check pass managers - for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(), - E = PassManagers.end(); I != E; ++I) - if (Pass *P = (*I)->findAnalysisPass(AID, false)) + for (PMDataManager *PassManager : PassManagers) + if (Pass *P = PassManager->findAnalysisPass(AID, false)) return P; // Check other pass managers - for (SmallVectorImpl<PMDataManager *>::iterator - I = IndirectPassManagers.begin(), - E = IndirectPassManagers.end(); I != E; ++I) - if (Pass *P = (*I)->findAnalysisPass(AID, false)) + for (PMDataManager *IndirectPassManager : IndirectPassManagers) + if (Pass *P = IndirectPassManager->findAnalysisPass(AID, false)) return P; // Check the immutable passes. Iterate in reverse order so that we find // the most recently registered passes first. - for (SmallVectorImpl<ImmutablePass *>::reverse_iterator I = - ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) { + for (auto I = ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; + ++I) { AnalysisID PI = (*I)->getPassID(); if (PI == AID) return *I; @@ -718,11 +710,9 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { assert(PassInf && "Expected all immutable passes to be initialized"); const std::vector<const PassInfo*> &ImmPI = PassInf->getInterfacesImplemented(); - for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(), - EE = ImmPI.end(); II != EE; ++II) { - if ((*II)->getTypeInfo() == AID) + for (const PassInfo *PI : ImmPI) + if (PI->getTypeInfo() == AID) return *I; - } } return nullptr; @@ -754,9 +744,8 @@ void PMTopLevelManager::dumpPasses() const { // (sometimes indirectly), but there's no inheritance relationship // between PMDataManager and Pass, so we have to getAsPass to get // from a PMDataManager* to a Pass*. - for (SmallVectorImpl<PMDataManager *>::const_iterator I = - PassManagers.begin(), E = PassManagers.end(); I != E; ++I) - (*I)->getAsPass()->dumpPassStructure(1); + for (PMDataManager *Manager : PassManagers) + Manager->getAsPass()->dumpPassStructure(1); } void PMTopLevelManager::dumpArguments() const { @@ -1426,11 +1415,8 @@ bool FunctionPassManagerImpl::doInitialization(Module &M) { dumpArguments(); dumpPasses(); - SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses(); - for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(), - E = IPV.end(); I != E; ++I) { - Changed |= (*I)->doInitialization(M); - } + for (ImmutablePass *ImPass : getImmutablePasses()) + Changed |= ImPass->doInitialization(M); for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) Changed |= getContainedManager(Index)->doInitialization(M); @@ -1444,11 +1430,8 @@ bool FunctionPassManagerImpl::doFinalization(Module &M) { for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index) Changed |= getContainedManager(Index)->doFinalization(M); - SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses(); - for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(), - E = IPV.end(); I != E; ++I) { - Changed |= (*I)->doFinalization(M); - } + for (ImmutablePass *ImPass : getImmutablePasses()) + Changed |= ImPass->doFinalization(M); return Changed; } @@ -1553,8 +1536,8 @@ bool FPPassManager::runOnFunction(Function &F) { bool FPPassManager::runOnModule(Module &M) { bool Changed = false; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - Changed |= runOnFunction(*I); + for (Function &F : M) + Changed |= runOnFunction(F); return Changed; } @@ -1588,10 +1571,8 @@ MPPassManager::runOnModule(Module &M) { bool Changed = false; // Initialize on-the-fly passes - for (std::map<Pass *, FunctionPassManagerImpl *>::iterator - I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); - I != E; ++I) { - FunctionPassManagerImpl *FPP = I->second; + for (auto &OnTheFlyManager : OnTheFlyManagers) { + FunctionPassManagerImpl *FPP = OnTheFlyManager.second; Changed |= FPP->doInitialization(M); } @@ -1632,10 +1613,8 @@ MPPassManager::runOnModule(Module &M) { Changed |= getContainedPass(Index)->doFinalization(M); // Finalize on-the-fly passes - for (std::map<Pass *, FunctionPassManagerImpl *>::iterator - I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); - I != E; ++I) { - FunctionPassManagerImpl *FPP = I->second; + for (auto &OnTheFlyManager : OnTheFlyManagers) { + FunctionPassManagerImpl *FPP = OnTheFlyManager.second; // We don't know when is the last time an on-the-fly pass is run, // so we need to releaseMemory / finalize here FPP->releaseMemoryOnTheFly(); @@ -1711,11 +1690,8 @@ bool PassManagerImpl::run(Module &M) { dumpArguments(); dumpPasses(); - SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses(); - for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(), - E = IPV.end(); I != E; ++I) { - Changed |= (*I)->doInitialization(M); - } + for (ImmutablePass *ImPass : getImmutablePasses()) + Changed |= ImPass->doInitialization(M); initializeAllAnalysisInfo(); for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) { @@ -1723,10 +1699,8 @@ bool PassManagerImpl::run(Module &M) { M.getContext().yield(); } - for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(), - E = IPV.end(); I != E; ++I) { - Changed |= (*I)->doFinalization(M); - } + for (ImmutablePass *ImPass : getImmutablePasses()) + Changed |= ImPass->doFinalization(M); return Changed; } @@ -1822,9 +1796,8 @@ void PMStack::push(PMDataManager *PM) { // Dump content of the pass manager stack. void PMStack::dump() const { - for (std::vector<PMDataManager *>::const_iterator I = S.begin(), - E = S.end(); I != E; ++I) - dbgs() << (*I)->getAsPass()->getPassName() << ' '; + for (PMDataManager *Manager : S) + dbgs() << Manager->getAsPass()->getPassName() << ' '; if (!S.empty()) dbgs() << '\n'; diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp index 354592df4c96..b4c5ca7c6a12 100644 --- a/lib/IR/MDBuilder.cpp +++ b/lib/IR/MDBuilder.cpp @@ -168,9 +168,16 @@ MDNode *MDBuilder::createTBAAScalarTypeNode(StringRef Name, MDNode *Parent, /// \brief Return metadata for a TBAA tag node with the given /// base type, access type and offset relative to the base type. MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType, - uint64_t Offset) { + uint64_t Offset, bool IsConstant) { Type *Int64 = Type::getInt64Ty(Context); - Metadata *Ops[3] = {BaseType, AccessType, - createConstant(ConstantInt::get(Int64, Offset))}; - return MDNode::get(Context, Ops); + if (IsConstant) { + Metadata *Ops[4] = {BaseType, AccessType, + createConstant(ConstantInt::get(Int64, Offset)), + createConstant(ConstantInt::get(Int64, 1))}; + return MDNode::get(Context, Ops); + } else { + Metadata *Ops[3] = {BaseType, AccessType, + createConstant(ConstantInt::get(Int64, Offset))}; + return MDNode::get(Context, Ops); + } } diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 23a17a52b2b8..75b4046ef442 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -256,9 +256,9 @@ ValueAsMetadata *ValueAsMetadata::get(Value *V) { if (!Entry) { assert((isa<Constant>(V) || isa<Argument>(V) || isa<Instruction>(V)) && "Expected constant or function-local value"); - assert(!V->NameAndIsUsedByMD.getInt() && + assert(!V->IsUsedByMD && "Expected this to be the only metadata use"); - V->NameAndIsUsedByMD.setInt(true); + V->IsUsedByMD = true; if (auto *C = dyn_cast<Constant>(V)) Entry = new ConstantAsMetadata(C); else @@ -302,15 +302,15 @@ void ValueAsMetadata::handleRAUW(Value *From, Value *To) { auto &Store = Context.pImpl->ValuesAsMetadata; auto I = Store.find(From); if (I == Store.end()) { - assert(!From->NameAndIsUsedByMD.getInt() && + assert(!From->IsUsedByMD && "Expected From not to be used by metadata"); return; } // Remove old entry from the map. - assert(From->NameAndIsUsedByMD.getInt() && + assert(From->IsUsedByMD && "Expected From to be used by metadata"); - From->NameAndIsUsedByMD.setInt(false); + From->IsUsedByMD = false; ValueAsMetadata *MD = I->second; assert(MD && "Expected valid metadata"); assert(MD->getValue() == From && "Expected valid mapping"); @@ -346,9 +346,9 @@ void ValueAsMetadata::handleRAUW(Value *From, Value *To) { } // Update MD in place (and update the map entry). - assert(!To->NameAndIsUsedByMD.getInt() && + assert(!To->IsUsedByMD && "Expected this to be the only metadata use"); - To->NameAndIsUsedByMD.setInt(true); + To->IsUsedByMD = true; MD->V = To; Entry = MD; } diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index 1c405168ae21..b5c4e5d4c6d5 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -307,12 +307,13 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) { // Check for the built-in integer types switch (NumBits) { - case 1: return cast<IntegerType>(Type::getInt1Ty(C)); - case 8: return cast<IntegerType>(Type::getInt8Ty(C)); - case 16: return cast<IntegerType>(Type::getInt16Ty(C)); - case 32: return cast<IntegerType>(Type::getInt32Ty(C)); - case 64: return cast<IntegerType>(Type::getInt64Ty(C)); - default: + case 1: return cast<IntegerType>(Type::getInt1Ty(C)); + case 8: return cast<IntegerType>(Type::getInt8Ty(C)); + case 16: return cast<IntegerType>(Type::getInt16Ty(C)); + case 32: return cast<IntegerType>(Type::getInt32Ty(C)); + case 64: return cast<IntegerType>(Type::getInt64Ty(C)); + case 128: return cast<IntegerType>(Type::getInt128Ty(C)); + default: break; } diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index fd0ed31ccc62..dcf0ad50190f 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -46,8 +46,9 @@ static inline Type *checkType(Type *Ty) { } Value::Value(Type *ty, unsigned scid) - : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid), HasValueHandle(0), - SubclassOptionalData(0), SubclassData(0), NumOperands(0) { + : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid), + HasValueHandle(0), SubclassOptionalData(0), SubclassData(0), + NumOperands(0), IsUsedByMD(false), HasName(false) { // FIXME: Why isn't this in the subclass gunk?? // Note, we cannot call isa<CallInst> before the CallInst has been // constructed. @@ -157,11 +158,39 @@ static bool getSymTab(Value *V, ValueSymbolTable *&ST) { return false; } +ValueName *Value::getValueName() const { + if (!HasName) return nullptr; + + LLVMContext &Ctx = getContext(); + auto I = Ctx.pImpl->ValueNames.find(this); + assert(I != Ctx.pImpl->ValueNames.end() && + "No name entry found!"); + + return I->second; +} + +void Value::setValueName(ValueName *VN) { + LLVMContext &Ctx = getContext(); + + assert(HasName == Ctx.pImpl->ValueNames.count(this) && + "HasName bit out of sync!"); + + if (!VN) { + if (HasName) + Ctx.pImpl->ValueNames.erase(this); + HasName = false; + return; + } + + HasName = true; + Ctx.pImpl->ValueNames[this] = VN; +} + StringRef Value::getName() const { // Make sure the empty string is still a C string. For historical reasons, // some clients want to call .data() on the result and expect it to be null // terminated. - if (!getValueName()) + if (!hasName()) return StringRef("", 0); return getValueName()->getKey(); } diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 3cf13a0ef2cd..716d66abf1c2 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -250,8 +250,8 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **name, return true; } -const void *LTOCodeGenerator::compileOptimized(size_t *length, - std::string &errMsg) { +std::unique_ptr<MemoryBuffer> +LTOCodeGenerator::compileOptimized(std::string &errMsg) { const char *name; if (!compileOptimizedToFile(&name, errMsg)) return nullptr; @@ -264,16 +264,11 @@ const void *LTOCodeGenerator::compileOptimized(size_t *length, sys::fs::remove(NativeObjectPath); return nullptr; } - NativeObjectFile = std::move(*BufferOrErr); // remove temp files sys::fs::remove(NativeObjectPath); - // return buffer, unless error - if (!NativeObjectFile) - return nullptr; - *length = NativeObjectFile->getBufferSize(); - return NativeObjectFile->getBufferStart(); + return std::move(*BufferOrErr); } @@ -289,16 +284,14 @@ bool LTOCodeGenerator::compile_to_file(const char **name, return compileOptimizedToFile(name, errMsg); } -const void* LTOCodeGenerator::compile(size_t *length, - bool disableInline, - bool disableGVNLoadPRE, - bool disableVectorization, - std::string &errMsg) { +std::unique_ptr<MemoryBuffer> +LTOCodeGenerator::compile(bool disableInline, bool disableGVNLoadPRE, + bool disableVectorization, std::string &errMsg) { if (!optimize(disableInline, disableGVNLoadPRE, disableVectorization, errMsg)) return nullptr; - return compileOptimized(length, errMsg); + return compileOptimized(errMsg); } bool LTOCodeGenerator::determineTarget(std::string &errMsg) { diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 3cee0c434942..13c5ca9561df 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -12,7 +12,6 @@ add_llvm_library(LLVMMC MCCodeGenInfo.cpp MCContext.cpp MCDwarf.cpp - MCELF.cpp MCELFObjectTargetWriter.cpp MCELFStreamer.cpp MCExpr.cpp @@ -36,6 +35,7 @@ add_llvm_library(LLVMMC MCStreamer.cpp MCSubtargetInfo.cpp MCSymbol.cpp + MCSymbolELF.cpp MCSymbolizer.cpp MCTargetOptions.cpp MCValue.cpp diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp index a723aa8fa409..f7649fba6e89 100644 --- a/lib/MC/ConstantPools.cpp +++ b/lib/MC/ConstantPools.cpp @@ -40,7 +40,7 @@ const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context, MCSymbol *CPEntryLabel = Context.createTempSymbol(); Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size)); - return MCSymbolRefExpr::Create(CPEntryLabel, Context); + return MCSymbolRefExpr::create(CPEntryLabel, Context); } bool ConstantPool::empty() { return Entries.empty(); } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 18746d1aa786..0765937d0ea8 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -21,12 +21,11 @@ #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Compression.h" @@ -71,23 +70,20 @@ public: class ELFObjectWriter : public MCObjectWriter { static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind); - static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant); static uint64_t SymbolValue(const MCSymbol &Sym, const MCAsmLayout &Layout); - static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbol &Symbol, + static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbolELF &Symbol, bool Used, bool Renamed); - static bool isLocal(const MCSymbol &Symbol, bool isUsedInReloc); /// Helper struct for containing some precomputed information on symbols. struct ELFSymbolData { - const MCSymbol *Symbol; - uint64_t StringIndex; + const MCSymbolELF *Symbol; uint32_t SectionIndex; StringRef Name; // Support lexicographic sorting. bool operator<(const ELFSymbolData &RHS) const { - unsigned LHSType = MCELF::GetType(Symbol->getData()); - unsigned RHSType = MCELF::GetType(RHS.Symbol->getData()); + unsigned LHSType = Symbol->getType(); + unsigned RHSType = RHS.Symbol->getType(); if (LHSType == ELF::STT_SECTION && RHSType != ELF::STT_SECTION) return false; if (LHSType != ELF::STT_SECTION && RHSType == ELF::STT_SECTION) @@ -101,9 +97,7 @@ class ELFObjectWriter : public MCObjectWriter { /// The target specific ELF writer instance. std::unique_ptr<MCELFObjectTargetWriter> TargetObjectWriter; - SmallPtrSet<const MCSymbol *, 16> UsedInReloc; - SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc; - DenseMap<const MCSymbol *, const MCSymbol *> Renames; + DenseMap<const MCSymbolELF *, const MCSymbolELF *> Renames; llvm::DenseMap<const MCSectionELF *, std::vector<ELFRelocationEntry>> Relocations; @@ -113,15 +107,9 @@ class ELFObjectWriter : public MCObjectWriter { /// @{ StringTableBuilder StrTabBuilder; - std::vector<uint64_t> FileSymbolData; - std::vector<ELFSymbolData> LocalSymbolData; - std::vector<ELFSymbolData> ExternalSymbolData; - std::vector<ELFSymbolData> UndefinedSymbolData; /// @} - bool NeedsGOT; - // This holds the symbol table index of the last local symbol. unsigned LastLocalSymbolIndex; // This holds the .strtab section index. @@ -145,23 +133,17 @@ class ELFObjectWriter : public MCObjectWriter { return TargetObjectWriter->GetRelocType(Target, Fixup, IsPCRel); } + void align(unsigned Alignment); + public: ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) - : MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW), - NeedsGOT(false) {} + : MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW) {} void reset() override { - UsedInReloc.clear(); - WeakrefUsedInReloc.clear(); Renames.clear(); Relocations.clear(); StrTabBuilder.clear(); - FileSymbolData.clear(); - LocalSymbolData.clear(); - ExternalSymbolData.clear(); - UndefinedSymbolData.clear(); - NeedsGOT = false; SectionTable.clear(); MCObjectWriter::reset(); } @@ -170,9 +152,9 @@ class ELFObjectWriter : public MCObjectWriter { void WriteWord(uint64_t W) { if (is64Bit()) - Write64(W); + write64(W); else - Write32(W); + write32(W); } template <typename T> void write(T Val) { @@ -184,29 +166,23 @@ class ELFObjectWriter : public MCObjectWriter { void writeHeader(const MCAssembler &Asm); - void WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD, - const MCAsmLayout &Layout); + void writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, + ELFSymbolData &MSD, const MCAsmLayout &Layout); // Start and end offset of each section typedef std::map<const MCSectionELF *, std::pair<uint64_t, uint64_t>> SectionOffsetsTy; - void writeSymbolTable(MCContext &Ctx, const MCAsmLayout &Layout, - SectionOffsetsTy &SectionOffsets); - bool shouldRelocateWithSymbol(const MCAssembler &Asm, const MCSymbolRefExpr *RefA, const MCSymbol *Sym, uint64_t C, unsigned Type) const; - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override; - uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm, - const MCSymbol *S); - // Map from a signature symbol to the group section index typedef DenseMap<const MCSymbol *, unsigned> RevGroupMapTy; @@ -217,17 +193,18 @@ class ELFObjectWriter : public MCObjectWriter { /// \param RevGroupMap - Maps a signature symbol to the group section. void computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, - const RevGroupMapTy &RevGroupMap); + const RevGroupMapTy &RevGroupMap, + SectionOffsetsTy &SectionOffsets); MCSectionELF *createRelocationSection(MCContext &Ctx, const MCSectionELF &Sec); const MCSectionELF *createStringTable(MCContext &Ctx); - void ExecutePostLayoutBinding(MCAssembler &Asm, + void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; - void writeSectionHeader(const MCAssembler &Asm, const MCAsmLayout &Layout, + void writeSectionHeader(const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, const SectionOffsetsTy &SectionOffsets); @@ -241,7 +218,7 @@ class ELFObjectWriter : public MCObjectWriter { void writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec); - bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, @@ -249,13 +226,18 @@ class ELFObjectWriter : public MCObjectWriter { bool isWeak(const MCSymbol &Sym) const override; - void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; void writeSection(const SectionIndexMapTy &SectionIndexMap, uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size, const MCSectionELF &Section); }; } +void ELFObjectWriter::align(unsigned Alignment) { + uint64_t Padding = OffsetToAlignment(OS.tell(), Alignment); + WriteZeros(Padding); +} + unsigned ELFObjectWriter::addToSectionTable(const MCSectionELF *Sec) { SectionTable.push_back(Sec); StrTabBuilder.add(Sec->getSectionName()); @@ -319,27 +301,6 @@ bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; } -bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) { - switch (Variant) { - default: - return false; - case MCSymbolRefExpr::VK_GOT: - case MCSymbolRefExpr::VK_PLT: - case MCSymbolRefExpr::VK_GOTPCREL: - case MCSymbolRefExpr::VK_GOTOFF: - case MCSymbolRefExpr::VK_TPOFF: - case MCSymbolRefExpr::VK_TLSGD: - case MCSymbolRefExpr::VK_GOTTPOFF: - case MCSymbolRefExpr::VK_INDNTPOFF: - case MCSymbolRefExpr::VK_NTPOFF: - case MCSymbolRefExpr::VK_GOTNTPOFF: - case MCSymbolRefExpr::VK_TLSLDM: - case MCSymbolRefExpr::VK_DTPOFF: - case MCSymbolRefExpr::VK_TLSLD: - return true; - } -} - ELFObjectWriter::~ELFObjectWriter() {} @@ -353,54 +314,53 @@ void ELFObjectWriter::writeHeader(const MCAssembler &Asm) { // emitWord method behaves differently for ELF32 and ELF64, writing // 4 bytes in the former and 8 in the latter. - WriteBytes(ELF::ElfMagic); // e_ident[EI_MAG0] to e_ident[EI_MAG3] + writeBytes(ELF::ElfMagic); // e_ident[EI_MAG0] to e_ident[EI_MAG3] - Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS] + write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS] // e_ident[EI_DATA] - Write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB); + write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB); - Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION] + write8(ELF::EV_CURRENT); // e_ident[EI_VERSION] // e_ident[EI_OSABI] - Write8(TargetObjectWriter->getOSABI()); - Write8(0); // e_ident[EI_ABIVERSION] + write8(TargetObjectWriter->getOSABI()); + write8(0); // e_ident[EI_ABIVERSION] WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD); - Write16(ELF::ET_REL); // e_type + write16(ELF::ET_REL); // e_type - Write16(TargetObjectWriter->getEMachine()); // e_machine = target + write16(TargetObjectWriter->getEMachine()); // e_machine = target - Write32(ELF::EV_CURRENT); // e_version + write32(ELF::EV_CURRENT); // e_version WriteWord(0); // e_entry, no entry point in .o file WriteWord(0); // e_phoff, no program header for .o WriteWord(0); // e_shoff = sec hdr table off in bytes // e_flags = whatever the target wants - Write32(Asm.getELFHeaderEFlags()); + write32(Asm.getELFHeaderEFlags()); // e_ehsize = ELF header size - Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr)); + write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr)); - Write16(0); // e_phentsize = prog header entry size - Write16(0); // e_phnum = # prog header entries = 0 + write16(0); // e_phentsize = prog header entry size + write16(0); // e_phnum = # prog header entries = 0 // e_shentsize = Section header entry size - Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr)); + write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr)); // e_shnum = # of section header ents - Write16(0); + write16(0); // e_shstrndx = Section # of '.shstrtab' assert(StringTableIndex < ELF::SHN_LORESERVE); - Write16(StringTableIndex); + write16(StringTableIndex); } uint64_t ELFObjectWriter::SymbolValue(const MCSymbol &Sym, const MCAsmLayout &Layout) { - MCSymbolData &Data = Sym.getData(); - if (Data.isCommon() && Data.isExternal()) - return Data.getCommonAlignment(); + if (Sym.isCommon() && Sym.isExternal()) + return Sym.getCommonAlignment(); uint64_t Res; if (!Layout.getSymbolOffset(Sym, Res)) @@ -412,22 +372,20 @@ uint64_t ELFObjectWriter::SymbolValue(const MCSymbol &Sym, return Res; } -void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, +void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { // The presence of symbol versions causes undefined symbols and // versions declared with @@@ to be renamed. - for (const MCSymbol &Alias : Asm.symbols()) { - MCSymbolData &OriginalData = Alias.getData(); - + for (const MCSymbol &A : Asm.symbols()) { + const auto &Alias = cast<MCSymbolELF>(A); // Not an alias. if (!Alias.isVariable()) continue; auto *Ref = dyn_cast<MCSymbolRefExpr>(Alias.getVariableValue()); if (!Ref) continue; - const MCSymbol &Symbol = Ref->getSymbol(); - MCSymbolData &SD = Asm.getSymbolData(Symbol); + const auto &Symbol = cast<MCSymbolELF>(Ref->getSymbol()); StringRef AliasName = Alias.getName(); size_t Pos = AliasName.find('@'); @@ -436,8 +394,8 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, // Aliases defined with .symvar copy the binding from the symbol they alias. // This is the first place we are able to copy this information. - OriginalData.setExternal(SD.isExternal()); - MCELF::SetBinding(OriginalData, MCELF::GetBinding(SD)); + Alias.setExternal(Symbol.isExternal()); + Alias.setBinding(Symbol.getBinding()); StringRef Rest = AliasName.substr(Pos); if (!Symbol.isUndefined() && !Rest.startswith("@@@")) @@ -487,40 +445,39 @@ static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) { return Type; } -void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD, +void ELFObjectWriter::writeSymbol(SymbolTableWriter &Writer, + uint32_t StringIndex, ELFSymbolData &MSD, const MCAsmLayout &Layout) { - MCSymbolData &OrigData = MSD.Symbol->getData(); - assert((!OrigData.getFragment() || - (OrigData.getFragment()->getParent() == &MSD.Symbol->getSection())) && + const auto &Symbol = cast<MCSymbolELF>(*MSD.Symbol); + assert((!Symbol.getFragment() || + (Symbol.getFragment()->getParent() == &Symbol.getSection())) && "The symbol's section doesn't match the fragment's symbol"); - const MCSymbol *Base = Layout.getBaseSymbol(*MSD.Symbol); + const MCSymbolELF *Base = + cast_or_null<MCSymbolELF>(Layout.getBaseSymbol(Symbol)); // This has to be in sync with when computeSymbolTable uses SHN_ABS or // SHN_COMMON. - bool IsReserved = !Base || OrigData.isCommon(); + bool IsReserved = !Base || Symbol.isCommon(); // Binding and Type share the same byte as upper and lower nibbles - uint8_t Binding = MCELF::GetBinding(OrigData); - uint8_t Type = MCELF::GetType(OrigData); - MCSymbolData *BaseSD = nullptr; + uint8_t Binding = Symbol.getBinding(); + uint8_t Type = Symbol.getType(); if (Base) { - BaseSD = &Layout.getAssembler().getSymbolData(*Base); - Type = mergeTypeForSet(Type, MCELF::GetType(*BaseSD)); + Type = mergeTypeForSet(Type, Base->getType()); } - uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift); + uint8_t Info = (Binding << 4) | Type; // Other and Visibility share the same byte with Visibility using the lower // 2 bits - uint8_t Visibility = MCELF::GetVisibility(OrigData); - uint8_t Other = MCELF::getOther(OrigData) << (ELF_STO_Shift - ELF_STV_Shift); - Other |= Visibility; + uint8_t Visibility = Symbol.getVisibility(); + uint8_t Other = Symbol.getOther() | Visibility; uint64_t Value = SymbolValue(*MSD.Symbol, Layout); uint64_t Size = 0; - const MCExpr *ESize = OrigData.getSize(); + const MCExpr *ESize = MSD.Symbol->getSize(); if (!ESize && Base) - ESize = BaseSD->getSize(); + ESize = Base->getSize(); if (ESize) { int64_t Res; @@ -530,78 +487,8 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD, } // Write out the symbol table entry - Writer.writeSymbol(MSD.StringIndex, Info, Value, Size, Other, - MSD.SectionIndex, IsReserved); -} - -void ELFObjectWriter::writeSymbolTable(MCContext &Ctx, - const MCAsmLayout &Layout, - SectionOffsetsTy &SectionOffsets) { - const MCSectionELF *SymtabSection = SectionTable[SymbolTableIndex - 1]; - - // The string table must be emitted first because we need the index - // into the string table for all the symbol names. - - SymbolTableWriter Writer(*this, is64Bit()); - - uint64_t Padding = - OffsetToAlignment(OS.tell(), SymtabSection->getAlignment()); - WriteZeros(Padding); - - uint64_t SecStart = OS.tell(); - - // The first entry is the undefined symbol entry. - Writer.writeSymbol(0, 0, 0, 0, 0, 0, false); - - for (unsigned i = 0, e = FileSymbolData.size(); i != e; ++i) { - Writer.writeSymbol(FileSymbolData[i], ELF::STT_FILE | ELF::STB_LOCAL, 0, 0, - ELF::STV_DEFAULT, ELF::SHN_ABS, true); - } - - // Write the symbol table entries. - LastLocalSymbolIndex = FileSymbolData.size() + LocalSymbolData.size() + 1; - - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) { - ELFSymbolData &MSD = LocalSymbolData[i]; - WriteSymbol(Writer, MSD, Layout); - } - - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) { - ELFSymbolData &MSD = ExternalSymbolData[i]; - MCSymbolData &Data = MSD.Symbol->getData(); - assert(((Data.getFlags() & ELF_STB_Global) || - (Data.getFlags() & ELF_STB_Weak)) && - "External symbol requires STB_GLOBAL or STB_WEAK flag"); - WriteSymbol(Writer, MSD, Layout); - if (MCELF::GetBinding(Data) == ELF::STB_LOCAL) - LastLocalSymbolIndex++; - } - - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) { - ELFSymbolData &MSD = UndefinedSymbolData[i]; - MCSymbolData &Data = MSD.Symbol->getData(); - WriteSymbol(Writer, MSD, Layout); - if (MCELF::GetBinding(Data) == ELF::STB_LOCAL) - LastLocalSymbolIndex++; - } - - uint64_t SecEnd = OS.tell(); - SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd); - - ArrayRef<uint32_t> ShndxIndexes = Writer.getShndxIndexes(); - if (ShndxIndexes.empty()) { - assert(SymtabShndxSectionIndex == 0); - return; - } - assert(SymtabShndxSectionIndex != 0); - - SecStart = OS.tell(); - const MCSectionELF *SymtabShndxSection = - SectionTable[SymtabShndxSectionIndex - 1]; - for (uint32_t Index : ShndxIndexes) - write(Index); - SecEnd = OS.tell(); - SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd); + Writer.writeSymbol(StringIndex, Info, Value, Size, Other, MSD.SectionIndex, + IsReserved); } // It is always valid to create a relocation with a symbol. It is preferable @@ -609,10 +496,9 @@ void ELFObjectWriter::writeSymbolTable(MCContext &Ctx, // allows us to omit some local symbols from the symbol table. bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, const MCSymbolRefExpr *RefA, - const MCSymbol *Sym, uint64_t C, + const MCSymbol *S, uint64_t C, unsigned Type) const { - MCSymbolData *SD = Sym ? &Sym->getData() : nullptr; - + const auto *Sym = cast_or_null<MCSymbolELF>(S); // A PCRel relocation to an absolute value has no symbol (or section). We // represent that with a relocation to a null section. if (!RefA) @@ -651,7 +537,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, if (Sym->isUndefined()) return true; - unsigned Binding = MCELF::GetBinding(*SD); + unsigned Binding = Sym->getBinding(); switch(Binding) { default: llvm_unreachable("Invalid Binding"); @@ -701,38 +587,19 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, if (Asm.isThumbFunc(Sym)) return true; - if (TargetObjectWriter->needsRelocateWithSymbol(*SD, Type)) + if (TargetObjectWriter->needsRelocateWithSymbol(*Sym, Type)) return true; return false; } -static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) { - const MCSymbol &Sym = Ref.getSymbol(); - - if (Ref.getKind() == MCSymbolRefExpr::VK_WEAKREF) - return &Sym; - - if (!Sym.isVariable()) - return nullptr; - - const MCExpr *Expr = Sym.getVariableValue(); - const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr); - if (!Inner) - return nullptr; - - if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) - return &Inner->getSymbol(); - return nullptr; -} - // True if the assembler knows nothing about the final value of the symbol. // This doesn't cover the comdat issues, since in those cases the assembler // can at least know that all symbols in the section will move together. -static bool isWeak(const MCSymbolData &D) { - if (MCELF::GetType(D) == ELF::STT_GNU_IFUNC) +static bool isWeak(const MCSymbolELF &Sym) { + if (Sym.getType() == ELF::STT_GNU_IFUNC) return true; - switch (MCELF::GetBinding(D)) { + switch (Sym.getBinding()) { default: llvm_unreachable("Unknown binding"); case ELF::STB_LOCAL: @@ -745,7 +612,7 @@ static bool isWeak(const MCSymbolData &D) { } } -void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, +void ELFObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, @@ -770,7 +637,7 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, Fixup.getLoc(), "No relocation available to represent this relative expression"); - const MCSymbol &SymB = RefB->getSymbol(); + const auto &SymB = cast<MCSymbolELF>(RefB->getSymbol()); if (SymB.isUndefined()) Asm.getContext().reportFatalError( @@ -784,7 +651,7 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, Asm.getContext().reportFatalError( Fixup.getLoc(), "Cannot represent a difference across sections"); - if (::isWeak(SymB.getData())) + if (::isWeak(SymB)) Asm.getContext().reportFatalError( Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol"); @@ -796,7 +663,18 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, // We either rejected the fixup or folded B into C at this point. const MCSymbolRefExpr *RefA = Target.getSymA(); - const MCSymbol *SymA = RefA ? &RefA->getSymbol() : nullptr; + const auto *SymA = RefA ? cast<MCSymbolELF>(&RefA->getSymbol()) : nullptr; + + bool ViaWeakRef = false; + if (SymA && SymA->isVariable()) { + const MCExpr *Expr = SymA->getVariableValue(); + if (const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr)) { + if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) { + SymA = cast<MCSymbolELF>(&Inner->getSymbol()); + ViaWeakRef = true; + } + } + } unsigned Type = GetRelocType(Target, Fixup, IsPCRel); bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type); @@ -811,50 +689,36 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, FixedValue = C; - // FIXME: What is this!?!? - MCSymbolRefExpr::VariantKind Modifier = - RefA ? RefA->getKind() : MCSymbolRefExpr::VK_None; - if (RelocNeedsGOT(Modifier)) - NeedsGOT = true; - if (!RelocateWithSymbol) { const MCSection *SecA = (SymA && !SymA->isUndefined()) ? &SymA->getSection() : nullptr; auto *ELFSec = cast_or_null<MCSectionELF>(SecA); - MCSymbol *SectionSymbol = - ELFSec ? Asm.getContext().getOrCreateSectionSymbol(*ELFSec) - : nullptr; + const auto *SectionSymbol = + ELFSec ? cast<MCSymbolELF>(ELFSec->getBeginSymbol()) : nullptr; + if (SectionSymbol) + SectionSymbol->setUsedInReloc(); ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend); Relocations[&FixupSection].push_back(Rec); return; } if (SymA) { - if (const MCSymbol *R = Renames.lookup(SymA)) + if (const MCSymbolELF *R = Renames.lookup(SymA)) SymA = R; - if (const MCSymbol *WeakRef = getWeakRef(*RefA)) - WeakrefUsedInReloc.insert(WeakRef); + if (ViaWeakRef) + SymA->setIsWeakrefUsedInReloc(); else - UsedInReloc.insert(SymA); + SymA->setUsedInReloc(); } ELFRelocationEntry Rec(FixupOffset, SymA, Type, Addend); Relocations[&FixupSection].push_back(Rec); return; } - -uint64_t -ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm, - const MCSymbol *S) { - assert(S->hasData()); - return S->getIndex(); -} - bool ELFObjectWriter::isInSymtab(const MCAsmLayout &Layout, - const MCSymbol &Symbol, bool Used, + const MCSymbolELF &Symbol, bool Used, bool Renamed) { - const MCSymbolData &Data = Symbol.getData(); if (Symbol.isVariable()) { const MCExpr *Expr = Symbol.getVariableValue(); if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(Expr)) { @@ -869,34 +733,19 @@ bool ELFObjectWriter::isInSymtab(const MCAsmLayout &Layout, if (Renamed) return false; - if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_") - return true; - - if (Symbol.isVariable()) { - const MCSymbol *Base = Layout.getBaseSymbol(Symbol); - if (Base && Base->isUndefined()) - return false; + if (Symbol.isVariable() && Symbol.isUndefined()) { + // FIXME: this is here just to diagnose the case of a var = commmon_sym. + Layout.getBaseSymbol(Symbol); + return false; } - bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL; - if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal) + if (Symbol.isUndefined() && !Symbol.isBindingSet()) return false; if (Symbol.isTemporary()) return false; - return true; -} - -bool ELFObjectWriter::isLocal(const MCSymbol &Symbol, bool isUsedInReloc) { - const MCSymbolData &Data = Symbol.getData(); - if (Data.isExternal()) - return false; - - if (Symbol.isDefined()) - return true; - - if (isUsedInReloc) + if (Symbol.getType() == ELF::STT_SECTION) return false; return true; @@ -904,9 +753,11 @@ bool ELFObjectWriter::isLocal(const MCSymbol &Symbol, bool isUsedInReloc) { void ELFObjectWriter::computeSymbolTable( MCAssembler &Asm, const MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap, - const RevGroupMapTy &RevGroupMap) { + const SectionIndexMapTy &SectionIndexMap, const RevGroupMapTy &RevGroupMap, + SectionOffsetsTy &SectionOffsets) { MCContext &Ctx = Asm.getContext(); + SymbolTableWriter Writer(*this, is64Bit()); + // Symbol table unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32; MCSectionELF *SymtabSection = @@ -914,49 +765,37 @@ void ELFObjectWriter::computeSymbolTable( SymtabSection->setAlignment(is64Bit() ? 8 : 4); SymbolTableIndex = addToSectionTable(SymtabSection); - // FIXME: Is this the correct place to do this? - // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed? - if (NeedsGOT) { - StringRef Name = "_GLOBAL_OFFSET_TABLE_"; - MCSymbol *Sym = Asm.getContext().getOrCreateSymbol(Name); - MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym); - Data.setExternal(true); - MCELF::SetBinding(Data, ELF::STB_GLOBAL); - } + align(SymtabSection->getAlignment()); + uint64_t SecStart = OS.tell(); + + // The first entry is the undefined symbol entry. + Writer.writeSymbol(0, 0, 0, 0, 0, 0, false); + + std::vector<ELFSymbolData> LocalSymbolData; + std::vector<ELFSymbolData> ExternalSymbolData; // Add the data for the symbols. bool HasLargeSectionIndex = false; - for (const MCSymbol &Symbol : Asm.symbols()) { - MCSymbolData &SD = Symbol.getData(); - - bool Used = UsedInReloc.count(&Symbol); - bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol); - bool isSignature = RevGroupMap.count(&Symbol); + for (const MCSymbol &S : Asm.symbols()) { + const auto &Symbol = cast<MCSymbolELF>(S); + bool Used = Symbol.isUsedInReloc(); + bool WeakrefUsed = Symbol.isWeakrefUsedInReloc(); + bool isSignature = Symbol.isSignature(); if (!isInSymtab(Layout, Symbol, Used || WeakrefUsed || isSignature, Renames.count(&Symbol))) continue; ELFSymbolData MSD; - MSD.Symbol = &Symbol; - const MCSymbol *BaseSymbol = Layout.getBaseSymbol(Symbol); - - // Undefined symbols are global, but this is the first place we - // are able to set it. - bool Local = isLocal(Symbol, Used); - if (!Local && MCELF::GetBinding(SD) == ELF::STB_LOCAL) { - assert(BaseSymbol); - MCSymbolData &BaseData = Asm.getSymbolData(*BaseSymbol); - MCELF::SetBinding(SD, ELF::STB_GLOBAL); - MCELF::SetBinding(BaseData, ELF::STB_GLOBAL); - } + MSD.Symbol = cast<MCSymbolELF>(&Symbol); - if (!BaseSymbol) { + bool Local = Symbol.getBinding() == ELF::STB_LOCAL; + if (Symbol.isAbsolute()) { MSD.SectionIndex = ELF::SHN_ABS; - } else if (SD.isCommon()) { + } else if (Symbol.isCommon()) { assert(!Local); MSD.SectionIndex = ELF::SHN_COMMON; - } else if (BaseSymbol->isUndefined()) { + } else if (Symbol.isUndefined()) { if (isSignature && !Used) { MSD.SectionIndex = RevGroupMap.lookup(&Symbol); if (MSD.SectionIndex >= ELF::SHN_LORESERVE) @@ -964,11 +803,9 @@ void ELFObjectWriter::computeSymbolTable( } else { MSD.SectionIndex = ELF::SHN_UNDEF; } - if (!Used && WeakrefUsed) - MCELF::SetBinding(SD, ELF::STB_WEAK); } else { const MCSectionELF &Section = - static_cast<const MCSectionELF&>(BaseSymbol->getSection()); + static_cast<const MCSectionELF &>(Symbol.getSection()); MSD.SectionIndex = SectionIndexMap.lookup(&Section); assert(MSD.SectionIndex && "Invalid section index!"); if (MSD.SectionIndex >= ELF::SHN_LORESERVE) @@ -1015,12 +852,10 @@ void ELFObjectWriter::computeSymbolTable( } // Sections have their own string table - if (MCELF::GetType(SD) != ELF::STT_SECTION) + if (Symbol.getType() != ELF::STT_SECTION) MSD.Name = StrTabBuilder.add(Name); - if (MSD.SectionIndex == ELF::SHN_UNDEF) - UndefinedSymbolData.push_back(MSD); - else if (Local) + if (Local) LocalSymbolData.push_back(MSD); else ExternalSymbolData.push_back(MSD); @@ -1033,38 +868,60 @@ void ELFObjectWriter::computeSymbolTable( SymtabShndxSection->setAlignment(4); } - for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i) - StrTabBuilder.add(*i); + ArrayRef<std::string> FileNames = Asm.getFileNames(); + for (const std::string &Name : FileNames) + StrTabBuilder.add(Name); StrTabBuilder.finalize(StringTableBuilder::ELF); - for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i) - FileSymbolData.push_back(StrTabBuilder.getOffset(*i)); - - for (ELFSymbolData &MSD : LocalSymbolData) - MSD.StringIndex = MCELF::GetType(MSD.Symbol->getData()) == ELF::STT_SECTION - ? 0 - : StrTabBuilder.getOffset(MSD.Name); - for (ELFSymbolData &MSD : ExternalSymbolData) - MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name); - for (ELFSymbolData& MSD : UndefinedSymbolData) - MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name); + for (const std::string &Name : FileNames) + Writer.writeSymbol(StrTabBuilder.getOffset(Name), + ELF::STT_FILE | ELF::STB_LOCAL, 0, 0, ELF::STV_DEFAULT, + ELF::SHN_ABS, true); // Symbols are required to be in lexicographic order. array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end()); array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); - array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); // Set the symbol indices. Local symbols must come before all other // symbols with non-local bindings. - unsigned Index = FileSymbolData.size() + 1; - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - LocalSymbolData[i].Symbol->setIndex(Index++); - - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - ExternalSymbolData[i].Symbol->setIndex(Index++); - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - UndefinedSymbolData[i].Symbol->setIndex(Index++); + unsigned Index = FileNames.size() + 1; + + for (ELFSymbolData &MSD : LocalSymbolData) { + unsigned StringIndex = MSD.Symbol->getType() == ELF::STT_SECTION + ? 0 + : StrTabBuilder.getOffset(MSD.Name); + MSD.Symbol->setIndex(Index++); + writeSymbol(Writer, StringIndex, MSD, Layout); + } + + // Write the symbol table entries. + LastLocalSymbolIndex = Index; + + for (ELFSymbolData &MSD : ExternalSymbolData) { + unsigned StringIndex = StrTabBuilder.getOffset(MSD.Name); + MSD.Symbol->setIndex(Index++); + writeSymbol(Writer, StringIndex, MSD, Layout); + assert(MSD.Symbol->getBinding() != ELF::STB_LOCAL); + } + + uint64_t SecEnd = OS.tell(); + SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd); + + ArrayRef<uint32_t> ShndxIndexes = Writer.getShndxIndexes(); + if (ShndxIndexes.empty()) { + assert(SymtabShndxSectionIndex == 0); + return; + } + assert(SymtabShndxSectionIndex != 0); + + SecStart = OS.tell(); + const MCSectionELF *SymtabShndxSection = + SectionTable[SymtabShndxSectionIndex - 1]; + for (uint32_t Index : ShndxIndexes) + write(Index); + SecEnd = OS.tell(); + SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd); } MCSectionELF * @@ -1182,14 +1039,14 @@ void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint32_t Link, uint32_t Info, uint64_t Alignment, uint64_t EntrySize) { - Write32(Name); // sh_name: index into string table - Write32(Type); // sh_type + write32(Name); // sh_name: index into string table + write32(Type); // sh_type WriteWord(Flags); // sh_flags WriteWord(Address); // sh_addr WriteWord(Offset); // sh_offset WriteWord(Size); // sh_size - Write32(Link); // sh_link - Write32(Info); // sh_info + write32(Link); // sh_link + write32(Info); // sh_info WriteWord(Alignment); // sh_addralign WriteWord(EntrySize); // sh_entsize } @@ -1204,8 +1061,7 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm, for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { const ELFRelocationEntry &Entry = Relocs[e - i - 1]; - unsigned Index = - Entry.Symbol ? getSymbolIndexInSymbolTable(Asm, Entry.Symbol) : 0; + unsigned Index = Entry.Symbol ? Entry.Symbol->getIndex() : 0; if (is64Bit()) { write(Entry.Offset); @@ -1292,8 +1148,7 @@ void ELFObjectWriter::writeSection(const SectionIndexMapTy &SectionIndexMap, } void ELFObjectWriter::writeSectionHeader( - const MCAssembler &Asm, const MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap, + const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, const SectionOffsetsTy &SectionOffsets) { const unsigned NumSections = SectionTable.size(); @@ -1308,7 +1163,7 @@ void ELFObjectWriter::writeSectionHeader( if (Type != ELF::SHT_GROUP) GroupSymbolIndex = 0; else - GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, Section->getGroup()); + GroupSymbolIndex = Section->getGroup()->getIndex(); const std::pair<uint64_t, uint64_t> &Offsets = SectionOffsets.find(Section)->second; @@ -1323,7 +1178,7 @@ void ELFObjectWriter::writeSectionHeader( } } -void ELFObjectWriter::WriteObject(MCAssembler &Asm, +void ELFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { MCContext &Ctx = Asm.getContext(); MCSectionELF *StrtabSection = @@ -1345,13 +1200,12 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, for (MCSection &Sec : Asm) { MCSectionELF &Section = static_cast<MCSectionELF &>(Sec); - uint64_t Padding = OffsetToAlignment(OS.tell(), Section.getAlignment()); - WriteZeros(Padding); + align(Section.getAlignment()); // Remember the offset into the file for this section. uint64_t SecStart = OS.tell(); - const MCSymbol *SignatureSymbol = Section.getGroup(); + const MCSymbolELF *SignatureSymbol = Section.getGroup(); writeSectionData(Asm, Section, Layout); uint64_t SecEnd = OS.tell(); @@ -1360,7 +1214,7 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, MCSectionELF *RelSection = createRelocationSection(Ctx, Section); if (SignatureSymbol) { - Asm.getOrCreateSymbolData(*SignatureSymbol); + Asm.registerSymbol(*SignatureSymbol); unsigned &GroupIdx = RevGroupMap[SignatureSymbol]; if (!GroupIdx) { MCSectionELF *Group = Ctx.createELFGroupSection(SignatureSymbol); @@ -1368,9 +1222,11 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, Group->setAlignment(4); Groups.push_back(Group); } - GroupMembers[SignatureSymbol].push_back(&Section); + std::vector<const MCSectionELF *> &Members = + GroupMembers[SignatureSymbol]; + Members.push_back(&Section); if (RelSection) - GroupMembers[SignatureSymbol].push_back(RelSection); + Members.push_back(RelSection); } SectionIndexMap[&Section] = addToSectionTable(&Section); @@ -1381,8 +1237,7 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, } for (MCSectionELF *Group : Groups) { - uint64_t Padding = OffsetToAlignment(OS.tell(), Group->getAlignment()); - WriteZeros(Padding); + align(Group->getAlignment()); // Remember the offset into the file for this section. uint64_t SecStart = OS.tell(); @@ -1400,11 +1255,10 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, } // Compute symbol table information. - computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap); + computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap, SectionOffsets); for (MCSectionELF *RelSection : Relocations) { - uint64_t Padding = OffsetToAlignment(OS.tell(), RelSection->getAlignment()); - WriteZeros(Padding); + align(RelSection->getAlignment()); // Remember the offset into the file for this section. uint64_t SecStart = OS.tell(); @@ -1415,8 +1269,6 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd); } - writeSymbolTable(Ctx, Layout, SectionOffsets); - { uint64_t SecStart = OS.tell(); const MCSectionELF *Sec = createStringTable(Ctx); @@ -1425,13 +1277,12 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, } uint64_t NaturalAlignment = is64Bit() ? 8 : 4; - uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment); - WriteZeros(Padding); + align(NaturalAlignment); const unsigned SectionHeaderOffset = OS.tell(); // ... then the section header table ... - writeSectionHeader(Asm, Layout, SectionIndexMap, SectionOffsets); + writeSectionHeader(Layout, SectionIndexMap, SectionOffsets); uint16_t NumSections = (SectionTable.size() + 1 >= ELF::SHN_LORESERVE) ? (uint16_t)ELF::SHN_UNDEF @@ -1459,21 +1310,22 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, NumSectionsOffset); } -bool ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( - const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, +bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( + const MCAssembler &Asm, const MCSymbol &SA, const MCFragment &FB, bool InSet, bool IsPCRel) const { + const auto &SymA = cast<MCSymbolELF>(SA); if (IsPCRel) { assert(!InSet); - if (::isWeak(SymA.getData())) + if (::isWeak(SymA)) return false; } - return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB, + return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB, InSet, IsPCRel); } -bool ELFObjectWriter::isWeak(const MCSymbol &Sym) const { - const MCSymbolData &SD = Sym.getData(); - if (::isWeak(SD)) +bool ELFObjectWriter::isWeak(const MCSymbol &S) const { + const auto &Sym = cast<MCSymbolELF>(S); + if (::isWeak(Sym)) return true; // It is invalid to replace a reference to a global in a comdat @@ -1482,7 +1334,7 @@ bool ELFObjectWriter::isWeak(const MCSymbol &Sym) const { // We could try to return false for more cases, like the reference // being in the same comdat or Sym being an alias to another global, // but it is not clear if it is worth the effort. - if (MCELF::GetBinding(SD) != ELF::STB_GLOBAL) + if (Sym.getBinding() != ELF::STB_GLOBAL) return false; if (!Sym.isInSection()) diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index c42757b70c8b..36c65b7bcd49 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -16,27 +16,33 @@ MCAsmBackend::MCAsmBackend() : HasDataInCodeSupport(false) {} MCAsmBackend::~MCAsmBackend() {} -const MCFixupKindInfo & -MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { +const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { static const MCFixupKindInfo Builtins[] = { - { "FK_Data_1", 0, 8, 0 }, - { "FK_Data_2", 0, 16, 0 }, - { "FK_Data_4", 0, 32, 0 }, - { "FK_Data_8", 0, 64, 0 }, - { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, - { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, - { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }, - { "FK_GPRel_1", 0, 8, 0 }, - { "FK_GPRel_2", 0, 16, 0 }, - { "FK_GPRel_4", 0, 32, 0 }, - { "FK_GPRel_8", 0, 64, 0 }, - { "FK_SecRel_1", 0, 8, 0 }, - { "FK_SecRel_2", 0, 16, 0 }, - { "FK_SecRel_4", 0, 32, 0 }, - { "FK_SecRel_8", 0, 64, 0 } - }; + {"FK_Data_1", 0, 8, 0}, + {"FK_Data_2", 0, 16, 0}, + {"FK_Data_4", 0, 32, 0}, + {"FK_Data_8", 0, 64, 0}, + {"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel}, + {"FK_GPRel_1", 0, 8, 0}, + {"FK_GPRel_2", 0, 16, 0}, + {"FK_GPRel_4", 0, 32, 0}, + {"FK_GPRel_8", 0, 64, 0}, + {"FK_SecRel_1", 0, 8, 0}, + {"FK_SecRel_2", 0, 16, 0}, + {"FK_SecRel_4", 0, 32, 0}, + {"FK_SecRel_8", 0, 64, 0}}; assert((size_t)Kind <= array_lengthof(Builtins) && "Unknown fixup kind"); return Builtins[Kind]; } + +bool MCAsmBackend::fixupNeedsRelaxationAdvanced( + const MCFixup &Fixup, bool Resolved, uint64_t Value, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { + if (!Resolved) + return true; + return fixupNeedsRelaxation(Fixup, Value, DF, Layout); +} diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index b61f5b1cdf70..100dc7c3dc60 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -50,6 +50,7 @@ MCAsmInfo::MCAsmInfo() { Code64Directive = ".code64"; AssemblerDialect = 0; AllowAtInName = false; + SupportsQuotedNames = true; UseDataRegionDirectives = false; ZeroDirective = "\t.zero\t"; AsciiDirective = "\t.ascii\t"; @@ -128,12 +129,31 @@ MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { if (!(Encoding & dwarf::DW_EH_PE_pcrel)) - return MCSymbolRefExpr::Create(Sym, Streamer.getContext()); + return MCSymbolRefExpr::create(Sym, Streamer.getContext()); MCContext &Context = Streamer.getContext(); - const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context); + const MCExpr *Res = MCSymbolRefExpr::create(Sym, Context); MCSymbol *PCSym = Context.createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); - return MCBinaryExpr::CreateSub(Res, PC, Context); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, Context); + return MCBinaryExpr::createSub(Res, PC, Context); +} + +static bool isAcceptableChar(char C) { + return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9') || C == '_' || C == '$' || C == '.' || C == '@'; +} + +bool MCAsmInfo::isValidUnquotedName(StringRef Name) const { + if (Name.empty()) + return false; + + // If any of the characters in the string is an unacceptable character, force + // quotes. + for (char C : Name) { + if (!isAcceptableChar(C)) + return false; + } + + return true; } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index cabe63b029f5..0f405ad1193e 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -24,7 +24,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" @@ -136,9 +136,10 @@ public: void EmitCOFFSymbolStorageClass(int StorageClass) override; void EmitCOFFSymbolType(int Type) override; void EndCOFFSymbolDef() override; + void EmitCOFFSafeSEH(MCSymbol const *Symbol) override; void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; void EmitCOFFSecRel32(MCSymbol const *Symbol) override; - void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; + void emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; @@ -307,7 +308,9 @@ void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); MCStreamer::EmitLabel(Symbol); - OS << *Symbol << MAI->getLabelSuffix(); + Symbol->print(OS, MAI); + OS << MAI->getLabelSuffix(); + EmitEOL(); } @@ -327,7 +330,7 @@ void MCAsmStreamer::EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) { if (!IsFirst) OS << ", "; IsFirst = false; - OS << **It; + (*It)->print(OS, MAI); } EmitEOL(); } @@ -383,20 +386,28 @@ void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) { // MCSymbols when they have spaces in them. OS << "\t.thumb_func"; // Only Mach-O hasSubsectionsViaSymbols() - if (MAI->hasSubsectionsViaSymbols()) - OS << '\t' << *Func; + if (MAI->hasSubsectionsViaSymbols()) { + OS << '\t'; + Func->print(OS, MAI); + } EmitEOL(); } void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { - OS << *Symbol << " = " << *Value; + Symbol->print(OS, MAI); + OS << " = "; + Value->print(OS, MAI); + EmitEOL(); MCStreamer::EmitAssignment(Symbol, Value); } void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { - OS << ".weakref " << *Alias << ", " << *Symbol; + OS << ".weakref "; + Alias->print(OS, MAI); + OS << ", "; + Symbol->print(OS, MAI); EmitEOL(); } @@ -413,8 +424,9 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_ELF_TypeGnuUniqueObject: /// .type _foo, @gnu_unique_object if (!MAI->hasDotTypeDotSizeDirective()) return false; // Symbol attribute not supported - OS << "\t.type\t" << *Symbol << ',' - << ((MAI->getCommentString()[0] != '@') ? '@' : '%'); + OS << "\t.type\t"; + Symbol->print(OS, MAI); + OS << ',' << ((MAI->getCommentString()[0] != '@') ? '@' : '%'); switch (Attribute) { default: return false; case MCSA_ELF_TypeFunction: OS << "function"; break; @@ -455,19 +467,23 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; } - OS << *Symbol; + Symbol->print(OS, MAI); EmitEOL(); return true; } void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { - OS << ".desc" << ' ' << *Symbol << ',' << DescValue; + OS << ".desc" << ' '; + Symbol->print(OS, MAI); + OS << ',' << DescValue; EmitEOL(); } void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { - OS << "\t.def\t " << *Symbol << ';'; + OS << "\t.def\t "; + Symbol->print(OS, MAI); + OS << ';'; EmitEOL(); } @@ -486,19 +502,30 @@ void MCAsmStreamer::EndCOFFSymbolDef() { EmitEOL(); } +void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { + OS << "\t.safeseh\t" << *Symbol; + EmitEOL(); +} + void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { - OS << "\t.secidx\t" << *Symbol; + OS << "\t.secidx\t"; + Symbol->print(OS, MAI); EmitEOL(); } void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { - OS << "\t.secrel32\t" << *Symbol; + OS << "\t.secrel32\t"; + Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { +void MCAsmStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) { assert(MAI->hasDotTypeDotSizeDirective()); - OS << "\t.size\t" << *Symbol << ", " << *Value << '\n'; + OS << "\t.size\t"; + Symbol->print(OS, MAI); + OS << ", "; + Value->print(OS, MAI); + OS << '\n'; } void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -506,7 +533,10 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, // Common symbols do not belong to any actual section. AssignSection(Symbol, nullptr); - OS << "\t.comm\t" << *Symbol << ',' << Size; + OS << "\t.comm\t"; + Symbol->print(OS, MAI); + OS << ',' << Size; + if (ByteAlignment != 0) { if (MAI->getCOMMDirectiveAlignmentIsInBytes()) OS << ',' << ByteAlignment; @@ -525,7 +555,10 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, // Common symbols do not belong to any actual section. AssignSection(Symbol, nullptr); - OS << "\t.lcomm\t" << *Symbol << ',' << Size; + OS << "\t.lcomm\t"; + Symbol->print(OS, MAI); + OS << ',' << Size; + if (ByteAlign > 1) { switch (MAI->getLCOMMDirectiveAlignmentType()) { case LCOMM::NoAlignment: @@ -555,7 +588,9 @@ void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, OS << MOSection->getSegmentName() << "," << MOSection->getSectionName(); if (Symbol) { - OS << ',' << *Symbol << ',' << Size; + OS << ','; + Symbol->print(OS, MAI); + OS << ',' << Size; if (ByteAlignment != 0) OS << ',' << Log2_32(ByteAlignment); } @@ -572,7 +607,9 @@ void MCAsmStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, assert(Symbol && "Symbol shouldn't be NULL!"); // Instead of using the Section we'll just use the shortcut. // This is a mach-o specific directive and section. - OS << ".tbss " << *Symbol << ", " << Size; + OS << ".tbss "; + Symbol->print(OS, MAI); + OS << ", " << Size; // Output align if we have it. We default to 1 so don't bother printing // that. @@ -643,7 +680,7 @@ void MCAsmStreamer::EmitBytes(StringRef Data) { } void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) { - EmitValue(MCConstantExpr::Create(Value, getContext()), Size); + EmitValue(MCConstantExpr::create(Value, getContext()), Size); } void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, @@ -662,7 +699,7 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, if (!Directive) { int64_t IntValue; - if (!Value->EvaluateAsAbsolute(IntValue)) + if (!Value->evaluateAsAbsolute(IntValue)) report_fatal_error("Don't know how to emit this value."); // We couldn't handle the requested integer size so we fallback by breaking @@ -697,39 +734,44 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, } assert(Directive && "Invalid size for machine code value!"); - OS << Directive << *Value; + OS << Directive; + Value->print(OS, MAI); EmitEOL(); } void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { int64_t IntValue; - if (Value->EvaluateAsAbsolute(IntValue)) { + if (Value->evaluateAsAbsolute(IntValue)) { EmitULEB128IntValue(IntValue); return; } - OS << ".uleb128 " << *Value; + OS << ".uleb128 "; + Value->print(OS, MAI); EmitEOL(); } void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { int64_t IntValue; - if (Value->EvaluateAsAbsolute(IntValue)) { + if (Value->evaluateAsAbsolute(IntValue)) { EmitSLEB128IntValue(IntValue); return; } - OS << ".sleb128 " << *Value; + OS << ".sleb128 "; + Value->print(OS, MAI); EmitEOL(); } void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) { assert(MAI->getGPRel64Directive() != nullptr); - OS << MAI->getGPRel64Directive() << *Value; + OS << MAI->getGPRel64Directive(); + Value->print(OS, MAI); EmitEOL(); } void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { assert(MAI->getGPRel32Directive() != nullptr); - OS << MAI->getGPRel32Directive() << *Value; + OS << MAI->getGPRel32Directive(); + Value->print(OS, MAI); EmitEOL(); } @@ -816,7 +858,9 @@ void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { // FIXME: Verify that Offset is associated with the current section. - OS << ".org " << *Offset << ", " << (unsigned) Value; + OS << ".org "; + Offset->print(OS, MAI); + OS << ", " << (unsigned)Value; EmitEOL(); return false; } @@ -987,13 +1031,15 @@ void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) { MCStreamer::EmitCFIPersonality(Sym, Encoding); - OS << "\t.cfi_personality " << Encoding << ", " << *Sym; + OS << "\t.cfi_personality " << Encoding << ", "; + Sym->print(OS, MAI); EmitEOL(); } void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { MCStreamer::EmitCFILsda(Sym, Encoding); - OS << "\t.cfi_lsda " << Encoding << ", " << *Sym; + OS << "\t.cfi_lsda " << Encoding << ", "; + Sym->print(OS, MAI); EmitEOL(); } @@ -1057,7 +1103,8 @@ void MCAsmStreamer::EmitCFIWindowSave() { void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) { MCStreamer::EmitWinCFIStartProc(Symbol); - OS << ".seh_proc " << *Symbol; + OS << ".seh_proc "; + Symbol->print(OS, MAI); EmitEOL(); } @@ -1086,7 +1133,8 @@ void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except) { MCStreamer::EmitWinEHHandler(Sym, Unwind, Except); - OS << "\t.seh_handler " << *Sym; + OS << "\t.seh_handler "; + Sym->print(OS, MAI); if (Unwind) OS << ", @unwind"; if (Except) @@ -1102,9 +1150,9 @@ void MCAsmStreamer::EmitWinEHHandlerData() { // We only do this so the section switch that terminates the handler // data block is visible. WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo(); - if (MCSection *XData = WinEH::UnwindEmitter::getXDataSection( - CurFrame->Function, getContext())) - SwitchSectionNoChange(XData); + MCSection *XData = + WinEH::UnwindEmitter::getXDataSection(CurFrame->Function, getContext()); + SwitchSectionNoChange(XData); OS << "\t.seh_handlerdata"; EmitEOL(); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 868b0f1228af..55f50097744d 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -120,14 +120,13 @@ uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { // Simple getSymbolOffset helper for the non-varibale case. static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S, bool ReportError, uint64_t &Val) { - const MCSymbolData &SD = S.getData(); - if (!SD.getFragment()) { + if (!S.getFragment()) { if (ReportError) report_fatal_error("unable to evaluate offset to undefined symbol '" + S.getName() + "'"); return false; } - Val = Layout.getFragmentOffset(SD.getFragment()) + SD.getOffset(); + Val = Layout.getFragmentOffset(S.getFragment()) + S.getOffset(); return true; } @@ -138,7 +137,7 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S, // If SD is a variable, evaluate it. MCValue Target; - if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr)) + if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr)) report_fatal_error("unable to evaluate offset for variable '" + S.getName() + "'"); @@ -195,8 +194,7 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const { const MCSymbol &ASym = A->getSymbol(); const MCAssembler &Asm = getAssembler(); - const MCSymbolData &ASD = Asm.getSymbolData(ASym); - if (ASD.isCommon()) { + if (ASym.isCommon()) { // FIXME: we should probably add a SMLoc to MCExpr. Asm.getContext().reportFatalError(SMLoc(), "Common symbol " + ASym.getName() + @@ -378,17 +376,17 @@ const MCSymbol *MCAssembler::getAtom(const MCSymbol &S) const { return &S; // Absolute and undefined symbols have no defining atom. - if (!S.getData().getFragment()) + if (!S.getFragment()) return nullptr; // Non-linker visible symbols in sections which can't be atomized have no // defining atom. if (!getContext().getAsmInfo()->isSectionAtomizableBySymbols( - *S.getData().getFragment()->getParent())) + *S.getFragment()->getParent())) return nullptr; // Otherwise, return the atom for the containing fragment. - return S.getData().getFragment()->getAtom(); + return S.getFragment()->getAtom(); } bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, @@ -396,11 +394,11 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, MCValue &Target, uint64_t &Value) const { ++stats::evaluateFixup; - // FIXME: This code has some duplication with RecordRelocation. We should + // FIXME: This code has some duplication with recordRelocation. We should // probably merge the two into a single callback that tries to evaluate a // fixup and records a relocation if one is needed. const MCExpr *Expr = Fixup.getValue(); - if (!Expr->EvaluateAsRelocatable(Target, &Layout, &Fixup)) + if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup)) getContext().reportFatalError(Fixup.getLoc(), "expected relocatable expression"); bool IsPCRel = Backend.getFixupKindInfo( @@ -418,7 +416,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, if (A->getKind() != MCSymbolRefExpr::VK_None || SA.isUndefined()) { IsResolved = false; } else { - IsResolved = getWriter().IsSymbolRefDifferenceFullyResolvedImpl( + IsResolved = getWriter().isSymbolRefDifferenceFullyResolvedImpl( *this, SA, *DF, false, true); } } @@ -475,6 +473,9 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_LEB: return cast<MCLEBFragment>(F).getContents().size(); + case MCFragment::FT_SafeSEH: + return 4; + case MCFragment::FT_Align: { const MCAlignFragment &AF = cast<MCAlignFragment>(F); unsigned Offset = Layout.getFragmentOffset(&AF); @@ -493,7 +494,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_Org: { const MCOrgFragment &OF = cast<MCOrgFragment>(F); int64_t TargetLocation; - if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout)) + if (!OF.getOffset().evaluateAsAbsolute(TargetLocation, Layout)) report_fatal_error("expected assembly-time absolute expression"); // FIXME: We need a way to communicate this error. @@ -575,7 +576,17 @@ void MCAsmLayout::layoutFragment(MCFragment *F) { /// a MCEncodedFragment. static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) { const MCEncodedFragment &EF = cast<MCEncodedFragment>(F); - OW->WriteBytes(EF.getContents()); + OW->writeBytes(EF.getContents()); +} + +void MCAssembler::registerSymbol(const MCSymbol &Symbol, bool *Created) { + bool New = !Symbol.isRegistered(); + if (Created) + *Created = New; + if (New) { + Symbol.setIsRegistered(true); + Symbols.push_back(&Symbol); + } } void MCAssembler::writeFragmentPadding(const MCFragment &F, uint64_t FSize, @@ -659,10 +670,10 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, for (uint64_t i = 0; i != Count; ++i) { switch (AF.getValueSize()) { default: llvm_unreachable("Invalid size!"); - case 1: OW->Write8 (uint8_t (AF.getValue())); break; - case 2: OW->Write16(uint16_t(AF.getValue())); break; - case 4: OW->Write32(uint32_t(AF.getValue())); break; - case 8: OW->Write64(uint64_t(AF.getValue())); break; + case 1: OW->write8 (uint8_t (AF.getValue())); break; + case 2: OW->write16(uint16_t(AF.getValue())); break; + case 4: OW->write32(uint32_t(AF.getValue())); break; + case 8: OW->write64(uint64_t(AF.getValue())); break; } } break; @@ -692,10 +703,10 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) { switch (FF.getValueSize()) { default: llvm_unreachable("Invalid size!"); - case 1: OW->Write8 (uint8_t (FF.getValue())); break; - case 2: OW->Write16(uint16_t(FF.getValue())); break; - case 4: OW->Write32(uint32_t(FF.getValue())); break; - case 8: OW->Write64(uint64_t(FF.getValue())); break; + case 1: OW->write8 (uint8_t (FF.getValue())); break; + case 2: OW->write16(uint16_t(FF.getValue())); break; + case 4: OW->write32(uint32_t(FF.getValue())); break; + case 8: OW->write64(uint64_t(FF.getValue())); break; } } break; @@ -703,7 +714,13 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, case MCFragment::FT_LEB: { const MCLEBFragment &LF = cast<MCLEBFragment>(F); - OW->WriteBytes(LF.getContents()); + OW->writeBytes(LF.getContents()); + break; + } + + case MCFragment::FT_SafeSEH: { + const MCSafeSEHFragment &SF = cast<MCSafeSEHFragment>(F); + OW->write32(SF.getSymbol()->getIndex()); break; } @@ -712,19 +729,19 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCOrgFragment &OF = cast<MCOrgFragment>(F); for (uint64_t i = 0, e = FragmentSize; i != e; ++i) - OW->Write8(uint8_t(OF.getValue())); + OW->write8(uint8_t(OF.getValue())); break; } case MCFragment::FT_Dwarf: { const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F); - OW->WriteBytes(OF.getContents()); + OW->writeBytes(OF.getContents()); break; } case MCFragment::FT_DwarfFrame: { const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F); - OW->WriteBytes(CF.getContents()); + OW->writeBytes(CF.getContents()); break; } } @@ -802,7 +819,7 @@ std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout, // The fixup was unresolved, we need a relocation. Inform the object // writer of the relocation, and give it an opportunity to adjust the // fixup value if need be. - getWriter().RecordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel, + getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel, FixedValue); } return std::make_pair(FixedValue, IsPCRel); @@ -857,7 +874,7 @@ void MCAssembler::Finish() { // Allow the object writer a chance to perform post-layout binding (for // example, to set the index fields in the symbol data). - getWriter().ExecutePostLayoutBinding(*this, Layout); + getWriter().executePostLayoutBinding(*this, Layout); // Evaluate and apply the fixups, generating relocation entries as necessary. for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { @@ -880,7 +897,7 @@ void MCAssembler::Finish() { } // Write the object file. - getWriter().WriteObject(*this, Layout); + getWriter().writeObject(*this, Layout); stats::ObjectBytes += OS.tell() - StartOffset; } @@ -888,13 +905,11 @@ void MCAssembler::Finish() { bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { - // If we cannot resolve the fixup value, it requires relaxation. MCValue Target; uint64_t Value; - if (!evaluateFixup(Layout, Fixup, DF, Target, Value)) - return true; - - return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout); + bool Resolved = evaluateFixup(Layout, Fixup, DF, Target, Value); + return getBackend().fixupNeedsRelaxationAdvanced(Fixup, Resolved, Value, DF, + Layout); } bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F, @@ -1088,6 +1103,7 @@ void MCFragment::dump() { case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break; case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break; case MCFragment::FT_LEB: OS << "MCLEBFragment"; break; + case MCFragment::FT_SafeSEH: OS << "MCSafeSEHFragment"; break; } OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder @@ -1180,25 +1196,13 @@ void MCFragment::dump() { OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned(); break; } + case MCFragment::FT_SafeSEH: { + const MCSafeSEHFragment *F = cast<MCSafeSEHFragment>(this); + OS << "\n "; + OS << " Sym:" << F->getSymbol(); + break; + } } - OS << ">"; -} - -void MCSymbolData::dump() const { - raw_ostream &OS = llvm::errs(); - - OS << "<MCSymbolData" - << " Fragment:" << getFragment(); - if (!isCommon()) - OS << " Offset:" << getOffset(); - OS << " Flags:" << getFlags(); - if (isCommon()) - OS << " (common, size:" << getCommonSize() - << " align: " << getCommonAlignment() << ")"; - if (isExternal()) - OS << " (external)"; - if (isPrivateExtern()) - OS << " (private extern)"; OS << ">"; } @@ -1219,7 +1223,6 @@ void MCAssembler::dump() { OS << "("; it->dump(); OS << ", Index:" << it->getIndex() << ", "; - it->getData().dump(); OS << ")"; } OS << "]>\n"; @@ -1236,5 +1239,6 @@ void MCAlignFragment::anchor() { } void MCFillFragment::anchor() { } void MCOrgFragment::anchor() { } void MCLEBFragment::anchor() { } +void MCSafeSEHFragment::anchor() { } void MCDwarfLineAddrFragment::anchor() { } void MCDwarfCallFrameFragment::anchor() { } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 1f2f034a3acb..1e52eedaf188 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -20,7 +20,9 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolCOFF.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolMachO.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -114,13 +116,13 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) { MCSymbol *&Sym = Symbols[NameRef]; if (!Sym) - Sym = CreateSymbol(NameRef, false); + Sym = createSymbol(NameRef, false, false); return Sym; } -MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) { - MCSymbol *&Sym = SectionSymbols[&Section]; +MCSymbolELF *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) { + MCSymbolELF *&Sym = SectionSymbols[&Section]; if (Sym) return Sym; @@ -128,12 +130,12 @@ MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) { MCSymbol *&OldSym = Symbols[Name]; if (OldSym && OldSym->isUndefined()) { - Sym = OldSym; - return OldSym; + Sym = cast<MCSymbolELF>(OldSym); + return Sym; } auto NameIter = UsedNames.insert(std::make_pair(Name, true)).first; - Sym = new (*this) MCSymbol(&*NameIter, /*isTemporary*/ false); + Sym = new (*this) MCSymbolELF(&*NameIter, /*isTemporary*/ false); if (!OldSym) OldSym = Sym; @@ -157,15 +159,32 @@ MCSymbol *MCContext::getOrCreateLSDASymbol(StringRef FuncName) { FuncName); } -MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) { - // Determine whether this is an assembler temporary or normal label, if used. - bool IsTemporary = false; +MCSymbol *MCContext::createSymbolImpl(const StringMapEntry<bool> *Name, + bool IsTemporary) { + if (MOFI) { + switch (MOFI->getObjectFileType()) { + case MCObjectFileInfo::IsCOFF: + return new (*this) MCSymbolCOFF(Name, IsTemporary); + case MCObjectFileInfo::IsELF: + return new (*this) MCSymbolELF(Name, IsTemporary); + case MCObjectFileInfo::IsMachO: + return new (*this) MCSymbolMachO(Name, IsTemporary); + } + } + return new (*this) MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary); +} + +MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix, + bool IsTemporary) { + if (IsTemporary && !UseNamesOnTempLabels) + return createSymbolImpl(nullptr, true); + + // Determine whether this is an user writter assembler temporary or normal + // label, if used. + IsTemporary = false; if (AllowTemporaryLabels) IsTemporary = Name.startswith(MAI->getPrivateGlobalPrefix()); - if (IsTemporary && AlwaysAddSuffix && !UseNamesOnTempLabels) - return new (*this) MCSymbol(nullptr, true); - SmallString<128> NewName = Name; bool AddSuffix = AlwaysAddSuffix; unsigned &NextUniqueID = NextID[Name]; @@ -178,8 +197,7 @@ MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) { if (NameEntry.second) { // Ok, we found a name. Have the MCSymbol object itself refer to the copy // of the string that is embedded in the UsedNames entry. - MCSymbol *Result = new (*this) MCSymbol(&*NameEntry.first, IsTemporary); - return Result; + return createSymbolImpl(&*NameEntry.first, IsTemporary); } assert(IsTemporary && "Cannot rename non-temporary symbols"); AddSuffix = true; @@ -190,13 +208,13 @@ MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) { MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) { SmallString<128> NameSV; raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name; - return CreateSymbol(NameSV, AlwaysAddSuffix); + return createSymbol(NameSV, AlwaysAddSuffix, true); } MCSymbol *MCContext::createLinkerPrivateTempSymbol() { SmallString<128> NameSV; raw_svector_ostream(NameSV) << MAI->getLinkerPrivateGlobalPrefix() << "tmp"; - return CreateSymbol(NameSV, true); + return createSymbol(NameSV, true, false); } MCSymbol *MCContext::createTempSymbol() { @@ -295,7 +313,7 @@ void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) { MCSectionELF *MCContext::createELFRelSection(StringRef Name, unsigned Type, unsigned Flags, unsigned EntrySize, - const MCSymbol *Group, + const MCSymbolELF *Group, const MCSectionELF *Associated) { StringMap<bool>::iterator I; bool Inserted; @@ -310,9 +328,9 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, unsigned Flags, unsigned EntrySize, StringRef Group, unsigned UniqueID, const char *BeginSymName) { - MCSymbol *GroupSym = nullptr; + MCSymbolELF *GroupSym = nullptr; if (!Group.empty()) - GroupSym = getOrCreateSymbol(Group); + GroupSym = cast<MCSymbolELF>(getOrCreateSymbol(Group)); return getELFSection(Section, Type, Flags, EntrySize, GroupSym, UniqueID, BeginSymName, nullptr); @@ -320,7 +338,7 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, unsigned Flags, unsigned EntrySize, - const MCSymbol *GroupSym, + const MCSymbolELF *GroupSym, unsigned UniqueID, const char *BeginSymName, const MCSectionELF *Associated) { @@ -353,7 +371,7 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, return Result; } -MCSectionELF *MCContext::createELFGroupSection(const MCSymbol *Group) { +MCSectionELF *MCContext::createELFGroupSection(const MCSymbolELF *Group) { MCSectionELF *Result = new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4, Group, ~0, nullptr, nullptr); @@ -447,13 +465,8 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) { /// Remove empty sections from SectionStartEndSyms, to avoid generating /// useless debug info for them. void MCContext::finalizeDwarfSections(MCStreamer &MCOS) { - std::vector<MCSection *> Keep; - for (MCSection *Sec : SectionsForRanges) { - if (MCOS.mayHaveInstructions(*Sec)) - Keep.push_back(Sec); - } - SectionsForRanges.clear(); - SectionsForRanges.insert(Keep.begin(), Keep.end()); + SectionsForRanges.remove_if( + [&](MCSection *Sec) { return !MCOS.mayHaveInstructions(*Sec); }); } void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) const { diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp index 1262e2ac2772..68948d36d65c 100644 --- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp +++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp @@ -88,9 +88,9 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, if (SymbolicOp.AddSymbol.Name) { StringRef Name(SymbolicOp.AddSymbol.Name); MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); - Add = MCSymbolRefExpr::Create(Sym, Ctx); + Add = MCSymbolRefExpr::create(Sym, Ctx); } else { - Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, Ctx); + Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx); } } @@ -99,37 +99,37 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, if (SymbolicOp.SubtractSymbol.Name) { StringRef Name(SymbolicOp.SubtractSymbol.Name); MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, Ctx); + Sub = MCSymbolRefExpr::create(Sym, Ctx); } else { - Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, Ctx); + Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx); } } const MCExpr *Off = nullptr; if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); + Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); const MCExpr *Expr; if (Sub) { const MCExpr *LHS; if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); + LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); else - LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); + LHS = MCUnaryExpr::createMinus(Sub, Ctx); if (Off) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); + Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); else Expr = LHS; } else if (Add) { if (Off) - Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); + Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); else Expr = Add; } else { if (Off) Expr = Off; else - Expr = MCConstantExpr::Create(0, Ctx); + Expr = MCConstantExpr::create(0, Ctx); } Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind); diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index a7e83f617088..90f96e2cef54 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -98,15 +98,15 @@ static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS, int IntVal) { MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; const MCExpr *Res = - MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext()); + MCSymbolRefExpr::create(&End, Variant, MCOS.getContext()); const MCExpr *RHS = - MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext()); + MCSymbolRefExpr::create(&Start, Variant, MCOS.getContext()); const MCExpr *Res1 = - MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext()); + MCBinaryExpr::create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext()); const MCExpr *Res2 = - MCConstantExpr::Create(IntVal, MCOS.getContext()); + MCConstantExpr::create(IntVal, MCOS.getContext()); const MCExpr *Res3 = - MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext()); + MCBinaryExpr::create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext()); return Res3; } @@ -247,7 +247,7 @@ static const MCExpr *forceExpAbs(MCStreamer &OS, const MCExpr* Expr) { MCSymbol *ABS = Context.createTempSymbol(); OS.EmitAssignment(ABS, Expr); - return MCSymbolRefExpr::Create(ABS, Context); + return MCSymbolRefExpr::create(ABS, Context); } static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) { @@ -616,7 +616,7 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, assert(StartSymbol && "StartSymbol must not be NULL"); assert(EndSymbol && "EndSymbol must not be NULL"); - const MCExpr *Addr = MCSymbolRefExpr::Create( + const MCExpr *Addr = MCSymbolRefExpr::create( StartSymbol, MCSymbolRefExpr::VK_None, context); const MCExpr *Size = MakeStartMinusEndExpr(*MCOS, *StartSymbol, *EndSymbol, 0); @@ -705,12 +705,12 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, assert(EndSymbol && "EndSymbol must not be NULL"); // AT_low_pc, the first address of the default .text section. - const MCExpr *Start = MCSymbolRefExpr::Create( + const MCExpr *Start = MCSymbolRefExpr::create( StartSymbol, MCSymbolRefExpr::VK_None, context); MCOS->EmitValue(Start, AddrSize); // AT_high_pc, the last address of the default .text section. - const MCExpr *End = MCSymbolRefExpr::Create( + const MCExpr *End = MCSymbolRefExpr::create( EndSymbol, MCSymbolRefExpr::VK_None, context); MCOS->EmitValue(End, AddrSize); } @@ -772,7 +772,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitIntValue(Entry.getLineNumber(), 4); // AT_low_pc, start address of the label. - const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry.getLabel(), + const MCExpr *AT_low_pc = MCSymbolRefExpr::create(Entry.getLabel(), MCSymbolRefExpr::VK_None, context); MCOS->EmitValue(AT_low_pc, AddrSize); @@ -812,7 +812,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) { assert(EndSymbol && "EndSymbol must not be NULL"); // Emit a base address selection entry for the start of this section - const MCExpr *SectionStartAddr = MCSymbolRefExpr::Create( + const MCExpr *SectionStartAddr = MCSymbolRefExpr::create( StartSymbol, MCSymbolRefExpr::VK_None, context); MCOS->EmitFill(AddrSize, 0xFF); MCOS->EmitValue(SectionStartAddr, AddrSize); diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp deleted file mode 100644 index 369063431fef..000000000000 --- a/lib/MC/MCELF.cpp +++ /dev/null @@ -1,85 +0,0 @@ -//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF object file writer information. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCELFSymbolFlags.h" -#include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/Support/ELF.h" - -namespace llvm { - -void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) { - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK || Binding == ELF::STB_GNU_UNIQUE); - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift); - SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift)); -} - -unsigned MCELF::GetBinding(const MCSymbolData &SD) { - uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift; - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK || Binding == ELF::STB_GNU_UNIQUE); - return Binding; -} - -void MCELF::SetType(MCSymbolData &SD, unsigned Type) { - assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || - Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || - Type == ELF::STT_COMMON || Type == ELF::STT_TLS || - Type == ELF::STT_GNU_IFUNC); - - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift); - SD.setFlags(OtherFlags | (Type << ELF_STT_Shift)); -} - -unsigned MCELF::GetType(const MCSymbolData &SD) { - uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift; - assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || - Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || - Type == ELF::STT_COMMON || Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC); - return Type; -} - -// Visibility is stored in the first two bits of st_other -// st_other values are stored in the second byte of get/setFlags -void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) { - assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || - Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); - - uint32_t OtherFlags = SD.getFlags() & ~(0x3 << ELF_STV_Shift); - SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift)); -} - -unsigned MCELF::GetVisibility(const MCSymbolData &SD) { - unsigned Visibility = - (SD.getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift; - assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || - Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); - return Visibility; -} - -// Other is stored in the last six bits of st_other -// st_other values are stored in the second byte of get/setFlags -void MCELF::setOther(MCSymbolData &SD, unsigned Other) { - uint32_t OtherFlags = SD.getFlags() & ~(0x3f << ELF_STO_Shift); - SD.setFlags(OtherFlags | (Other << ELF_STO_Shift)); -} - -unsigned MCELF::getOther(const MCSymbolData &SD) { - unsigned Other = - (SD.getFlags() & (0x3f << ELF_STO_Shift)) >> ELF_STO_Shift; - return Other; -} - -} diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index dc3d6c35489e..bc0ba85a8ff6 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -24,7 +24,7 @@ MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_, IsN64(IsN64_){ } -bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbolData &SD, +bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const { return false; } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 653a1d2b3a53..e0f4a2ae16a3 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -20,14 +20,13 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" @@ -39,7 +38,7 @@ using namespace llvm; bool MCELFStreamer::isBundleLocked() const { - return getCurrentSectionData()->isBundleLocked(); + return getCurrentSectionOnly()->isBundleLocked(); } MCELFStreamer::~MCELFStreamer() { @@ -106,16 +105,16 @@ void MCELFStreamer::InitSections(bool NoExecStack) { SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx)); } -void MCELFStreamer::EmitLabel(MCSymbol *Symbol) { +void MCELFStreamer::EmitLabel(MCSymbol *S) { + auto *Symbol = cast<MCSymbolELF>(S); assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); MCObjectStreamer::EmitLabel(Symbol); const MCSectionELF &Section = static_cast<const MCSectionELF&>(Symbol->getSection()); - MCSymbolData &SD = getAssembler().getSymbolData(*Symbol); if (Section.getFlags() & ELF::SHF_TLS) - MCELF::SetType(SD, ELF::STT_TLS); + Symbol->setType(ELF::STT_TLS); } void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -146,7 +145,7 @@ static void setSectionAlignmentForBundling(const MCAssembler &Assembler, void MCELFStreamer::ChangeSection(MCSection *Section, const MCExpr *Subsection) { - MCSection *CurSection = getCurrentSectionData(); + MCSection *CurSection = getCurrentSectionOnly(); if (CurSection && isBundleLocked()) report_fatal_error("Unterminated .bundle_lock when changing a section"); @@ -156,19 +155,24 @@ void MCELFStreamer::ChangeSection(MCSection *Section, auto *SectionELF = static_cast<const MCSectionELF *>(Section); const MCSymbol *Grp = SectionELF->getGroup(); if (Grp) - Asm.getOrCreateSymbolData(*Grp); + Asm.registerSymbol(*Grp); this->MCObjectStreamer::ChangeSection(Section, Subsection); - MCSymbol *SectionSymbol = getContext().getOrCreateSectionSymbol(*SectionELF); - if (SectionSymbol->isUndefined()) { - EmitLabel(SectionSymbol); - MCELF::SetType(Asm.getSymbolData(*SectionSymbol), ELF::STT_SECTION); + MCContext &Ctx = getContext(); + auto *Begin = cast_or_null<MCSymbolELF>(Section->getBeginSymbol()); + if (!Begin) { + Begin = Ctx.getOrCreateSectionSymbol(*SectionELF); + Section->setBeginSymbol(Begin); + } + if (Begin->isUndefined()) { + Asm.registerSymbol(*Begin); + Begin->setType(ELF::STT_SECTION); } } void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { - getAssembler().getOrCreateSymbolData(*Symbol); - const MCExpr *Value = MCSymbolRefExpr::Create( + getAssembler().registerSymbol(*Symbol); + const MCExpr *Value = MCSymbolRefExpr::create( Symbol, MCSymbolRefExpr::VK_WEAKREF, getContext()); Alias->setVariableValue(Value); } @@ -192,8 +196,8 @@ static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) { return T2; } -bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, - MCSymbolAttr Attribute) { +bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { + auto *Symbol = cast<MCSymbolELF>(S); // Indirect symbols are handled differently, to match how 'as' handles // them. This makes writing matching .o files easier. if (Attribute == MCSA_IndirectSymbol) { @@ -201,15 +205,15 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, // important for matching the string table that 'as' generates. IndirectSymbolData ISD; ISD.Symbol = Symbol; - ISD.Section = getCurrentSectionData(); + ISD.Section = getCurrentSectionOnly(); getAssembler().getIndirectSymbols().push_back(ISD); return true; } // Adding a symbol attribute always introduces the symbol, note that an - // important side effect of calling getOrCreateSymbolData here is to register + // important side effect of calling registerSymbol here is to register // the symbol with the assembler. - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + getAssembler().registerSymbol(*Symbol); // The implementation of symbol attributes is designed to match 'as', but it // leaves much to desired. It doesn't really make sense to arbitrarily add and @@ -233,90 +237,81 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, break; case MCSA_ELF_TypeGnuUniqueObject: - MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), ELF::STT_OBJECT)); - MCELF::SetBinding(SD, ELF::STB_GNU_UNIQUE); - SD.setExternal(true); - BindingExplicitlySet.insert(Symbol); + Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT)); + Symbol->setBinding(ELF::STB_GNU_UNIQUE); + Symbol->setExternal(true); break; case MCSA_Global: - MCELF::SetBinding(SD, ELF::STB_GLOBAL); - SD.setExternal(true); - BindingExplicitlySet.insert(Symbol); + Symbol->setBinding(ELF::STB_GLOBAL); + Symbol->setExternal(true); break; case MCSA_WeakReference: case MCSA_Weak: - MCELF::SetBinding(SD, ELF::STB_WEAK); - SD.setExternal(true); - BindingExplicitlySet.insert(Symbol); + Symbol->setBinding(ELF::STB_WEAK); + Symbol->setExternal(true); break; case MCSA_Local: - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); - BindingExplicitlySet.insert(Symbol); + Symbol->setBinding(ELF::STB_LOCAL); + Symbol->setExternal(false); break; case MCSA_ELF_TypeFunction: - MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), - ELF::STT_FUNC)); + Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_FUNC)); break; case MCSA_ELF_TypeIndFunction: - MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), - ELF::STT_GNU_IFUNC)); + Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_GNU_IFUNC)); break; case MCSA_ELF_TypeObject: - MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), - ELF::STT_OBJECT)); + Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT)); break; case MCSA_ELF_TypeTLS: - MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), - ELF::STT_TLS)); + Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_TLS)); break; case MCSA_ELF_TypeCommon: // TODO: Emit these as a common symbol. - MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), - ELF::STT_OBJECT)); + Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT)); break; case MCSA_ELF_TypeNoType: - MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), - ELF::STT_NOTYPE)); + Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_NOTYPE)); break; case MCSA_Protected: - MCELF::SetVisibility(SD, ELF::STV_PROTECTED); + Symbol->setVisibility(ELF::STV_PROTECTED); break; case MCSA_Hidden: - MCELF::SetVisibility(SD, ELF::STV_HIDDEN); + Symbol->setVisibility(ELF::STV_HIDDEN); break; case MCSA_Internal: - MCELF::SetVisibility(SD, ELF::STV_INTERNAL); + Symbol->setVisibility(ELF::STV_INTERNAL); break; } return true; } -void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) { - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); +void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size, + unsigned ByteAlignment) { + auto *Symbol = cast<MCSymbolELF>(S); + getAssembler().registerSymbol(*Symbol); - if (!BindingExplicitlySet.count(Symbol)) { - MCELF::SetBinding(SD, ELF::STB_GLOBAL); - SD.setExternal(true); + if (!Symbol->isBindingSet()) { + Symbol->setBinding(ELF::STB_GLOBAL); + Symbol->setExternal(true); } - MCELF::SetType(SD, ELF::STT_OBJECT); + Symbol->setType(ELF::STT_OBJECT); - if (MCELF::GetBinding(SD) == ELF_STB_Local) { + if (Symbol->getBinding() == ELF::STB_LOCAL) { MCSection *Section = getAssembler().getContext().getELFSection( ".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); @@ -325,24 +320,26 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, struct LocalCommon L = {Symbol, Size, ByteAlignment}; LocalCommons.push_back(L); } else { - SD.setCommon(Size, ByteAlignment); + if(Symbol->declareCommon(Size, ByteAlignment)) + report_fatal_error("Symbol: " + Symbol->getName() + + " redeclared as different type"); } - SD.setSize(MCConstantExpr::Create(Size, getContext())); + cast<MCSymbolELF>(Symbol) + ->setSize(MCConstantExpr::create(Size, getContext())); } -void MCELFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.setSize(Value); +void MCELFStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) { + Symbol->setSize(Value); } -void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, +void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size, unsigned ByteAlignment) { + auto *Symbol = cast<MCSymbolELF>(S); // FIXME: Should this be caught and done earlier? - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); - BindingExplicitlySet.insert(Symbol); + getAssembler().registerSymbol(*Symbol); + Symbol->setBinding(ELF::STB_LOCAL); + Symbol->setExternal(false); EmitCommonSymbol(Symbol, Size, ByteAlignment); } @@ -456,8 +453,8 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { case MCSymbolRefExpr::VK_PPC_TLSLD: break; } - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); + getAssembler().registerSymbol(symRef.getSymbol()); + cast<MCSymbolELF>(symRef.getSymbol()).setType(ELF::STT_TLS); break; } @@ -506,7 +503,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst, MCDataFragment *DF; if (Assembler.isBundlingEnabled()) { - MCSection &Sec = *getCurrentSectionData(); + MCSection &Sec = *getCurrentSectionOnly(); if (Assembler.getRelaxAll() && isBundleLocked()) // If the -mc-relax-all flag is used and we are bundle-locked, we re-use // the current bundle group. @@ -574,7 +571,7 @@ void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) { } void MCELFStreamer::EmitBundleLock(bool AlignToEnd) { - MCSection &Sec = *getCurrentSectionData(); + MCSection &Sec = *getCurrentSectionOnly(); // Sanity checks // @@ -595,7 +592,7 @@ void MCELFStreamer::EmitBundleLock(bool AlignToEnd) { } void MCELFStreamer::EmitBundleUnlock() { - MCSection &Sec = *getCurrentSectionData(); + MCSection &Sec = *getCurrentSectionOnly(); // Sanity checks if (!getAssembler().isBundlingEnabled()) @@ -606,7 +603,7 @@ void MCELFStreamer::EmitBundleUnlock() { report_fatal_error("Empty bundle-locked group is forbidden"); // When the -mc-relax-all flag is used, we emit instructions to fragments - // stored on a stack. When the bundle unlock is emited, we pop a fragment + // stored on a stack. When the bundle unlock is emited, we pop a fragment // from the stack a merge it to the one below. if (getAssembler().getRelaxAll()) { assert(!BundleGroups.empty() && "There are no bundle groups"); @@ -641,7 +638,7 @@ void MCELFStreamer::Flush() { new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &Section); MCFragment *F = new MCFillFragment(0, 0, Size, &Section); - Symbol.getData().setFragment(F); + Symbol.setFragment(F); // Update the maximum alignment of the section if necessary. if (ByteAlignment > Section.getAlignment()) @@ -653,7 +650,7 @@ void MCELFStreamer::Flush() { void MCELFStreamer::FinishImpl() { // Ensure the last section gets aligned if necessary. - MCSection *CurSection = getCurrentSectionData(); + MCSection *CurSection = getCurrentSectionOnly(); setSectionAlignmentForBundling(getAssembler(), CurSection); EmitFrames(nullptr); diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 7f048d7b3d9d..b16245ac3f45 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -30,10 +30,10 @@ STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); } } -void MCExpr::print(raw_ostream &OS) const { +void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const { switch (getKind()) { case MCExpr::Target: - return cast<MCTargetExpr>(this)->PrintImpl(OS); + return cast<MCTargetExpr>(this)->printImpl(OS, MAI); case MCExpr::Constant: OS << cast<MCConstantExpr>(*this).getValue(); return; @@ -44,10 +44,12 @@ void MCExpr::print(raw_ostream &OS) const { // Parenthesize names that start with $ so that they don't look like // absolute names. bool UseParens = Sym.getName()[0] == '$'; - if (UseParens) - OS << '(' << Sym << ')'; - else - OS << Sym; + if (UseParens) { + OS << '('; + Sym.print(OS, MAI); + OS << ')'; + } else + Sym.print(OS, MAI); if (SRE.getKind() != MCSymbolRefExpr::VK_None) SRE.printVariantKind(OS); @@ -63,7 +65,7 @@ void MCExpr::print(raw_ostream &OS) const { case MCUnaryExpr::Not: OS << '~'; break; case MCUnaryExpr::Plus: OS << '+'; break; } - OS << *UE.getSubExpr(); + UE.getSubExpr()->print(OS, MAI); return; } @@ -72,9 +74,11 @@ void MCExpr::print(raw_ostream &OS) const { // Only print parens around the LHS if it is non-trivial. if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) { - OS << *BE.getLHS(); + BE.getLHS()->print(OS, MAI); } else { - OS << '(' << *BE.getLHS() << ')'; + OS << '('; + BE.getLHS()->print(OS, MAI); + OS << ')'; } switch (BE.getOpcode()) { @@ -111,9 +115,11 @@ void MCExpr::print(raw_ostream &OS) const { // Only print parens around the LHS if it is non-trivial. if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { - OS << *BE.getRHS(); + BE.getRHS()->print(OS, MAI); } else { - OS << '(' << *BE.getRHS() << ')'; + OS << '('; + BE.getRHS()->print(OS, MAI); + OS << ')'; } return; } @@ -131,17 +137,17 @@ void MCExpr::dump() const { /* *** */ -const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS, +const MCBinaryExpr *MCBinaryExpr::create(Opcode Opc, const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx) { return new (Ctx) MCBinaryExpr(Opc, LHS, RHS); } -const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr, +const MCUnaryExpr *MCUnaryExpr::create(Opcode Opc, const MCExpr *Expr, MCContext &Ctx) { return new (Ctx) MCUnaryExpr(Opc, Expr); } -const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) { +const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx) { return new (Ctx) MCConstantExpr(Value); } @@ -156,15 +162,15 @@ MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind, assert(Symbol); } -const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym, +const MCSymbolRefExpr *MCSymbolRefExpr::create(const MCSymbol *Sym, VariantKind Kind, MCContext &Ctx) { return new (Ctx) MCSymbolRefExpr(Sym, Kind, Ctx.getAsmInfo()); } -const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind, +const MCSymbolRefExpr *MCSymbolRefExpr::create(StringRef Name, VariantKind Kind, MCContext &Ctx) { - return Create(Ctx.getOrCreateSymbol(Name), Kind, Ctx); + return create(Ctx.getOrCreateSymbol(Name), Kind, Ctx); } StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { @@ -400,23 +406,23 @@ void MCTargetExpr::anchor() {} /* *** */ -bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const { - return EvaluateAsAbsolute(Res, nullptr, nullptr, nullptr); +bool MCExpr::evaluateAsAbsolute(int64_t &Res) const { + return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr); } -bool MCExpr::EvaluateAsAbsolute(int64_t &Res, +bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const { - return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr); + return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr); } -bool MCExpr::EvaluateAsAbsolute(int64_t &Res, +bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout, const SectionAddrMap &Addrs) const { - return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs); + return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs); } -bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const { - return EvaluateAsAbsolute(Res, &Asm, nullptr, nullptr); +bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const { + return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr); } bool MCExpr::evaluateKnownAbsolute(int64_t &Res, @@ -425,7 +431,7 @@ bool MCExpr::evaluateKnownAbsolute(int64_t &Res, true); } -bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, +bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs) const { // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us @@ -446,7 +452,7 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, } bool IsRelocatable = - EvaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet); + evaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet); // Record the current value. Res = Value.getConstant(); @@ -468,14 +474,11 @@ static void AttemptToFoldSymbolOffsetDifference( if (SA.isUndefined() || SB.isUndefined()) return; - if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet)) + if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet)) return; - const MCSymbolData &AD = Asm->getSymbolData(SA); - const MCSymbolData &BD = Asm->getSymbolData(SB); - - if (AD.getFragment() == BD.getFragment()) { - Addend += (AD.getOffset() - BD.getOffset()); + if (SA.getFragment() == SB.getFragment()) { + Addend += (SA.getOffset() - SB.getOffset()); // Pointers to Thumb symbols need to have their low-bit set to allow // for interworking. @@ -491,8 +494,8 @@ static void AttemptToFoldSymbolOffsetDifference( if (!Layout) return; - const MCSection &SecA = *AD.getFragment()->getParent(); - const MCSection &SecB = *BD.getFragment()->getParent(); + const MCSection &SecA = *SA.getFragment()->getParent(); + const MCSection &SecB = *SB.getFragment()->getParent(); if ((&SecA != &SecB) && !Addrs) return; @@ -589,21 +592,28 @@ EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout, return true; } -bool MCExpr::EvaluateAsRelocatable(MCValue &Res, +bool MCExpr::evaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr; - return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr, + return evaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr, false); } bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const { MCAssembler *Assembler = &Layout.getAssembler(); - return EvaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr, + return evaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr, true); } static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) { + const MCExpr *Expr = Sym.getVariableValue(); + const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr); + if (Inner) { + if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) + return false; + } + if (InSet) return true; if (!Asm) @@ -611,7 +621,7 @@ static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) { return !Asm->getWriter().isWeak(Sym); } -bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, +bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const MCFixup *Fixup, const SectionAddrMap *Addrs, @@ -620,7 +630,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, switch (getKind()) { case Target: - return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout, + return cast<MCTargetExpr>(this)->evaluateAsRelocatableImpl(Res, Layout, Fixup); case Constant: @@ -635,7 +645,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None && canExpand(Sym, Asm, InSet)) { bool IsMachO = SRE->hasSubsectionsViaSymbols(); - if (Sym.getVariableValue()->EvaluateAsRelocatableImpl( + if (Sym.getVariableValue()->evaluateAsRelocatableImpl( Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) { if (!IsMachO) return true; @@ -661,7 +671,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this); MCValue Value; - if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, Fixup, + if (!AUE->getSubExpr()->evaluateAsRelocatableImpl(Value, Asm, Layout, Fixup, Addrs, InSet)) return false; @@ -695,9 +705,9 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this); MCValue LHSValue, RHSValue; - if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup, + if (!ABE->getLHS()->evaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup, Addrs, InSet) || - !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup, + !ABE->getRHS()->evaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup, Addrs, InSet)) return false; @@ -755,11 +765,11 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, llvm_unreachable("Invalid assembly expression kind!"); } -MCSection *MCExpr::FindAssociatedSection() const { +MCSection *MCExpr::findAssociatedSection() const { switch (getKind()) { case Target: // We never look through target specific expressions. - return cast<MCTargetExpr>(this)->FindAssociatedSection(); + return cast<MCTargetExpr>(this)->findAssociatedSection(); case Constant: return MCSymbol::AbsolutePseudoSection; @@ -775,12 +785,12 @@ MCSection *MCExpr::FindAssociatedSection() const { } case Unary: - return cast<MCUnaryExpr>(this)->getSubExpr()->FindAssociatedSection(); + return cast<MCUnaryExpr>(this)->getSubExpr()->findAssociatedSection(); case Binary: { const MCBinaryExpr *BE = cast<MCBinaryExpr>(this); - MCSection *LHS_S = BE->getLHS()->FindAssociatedSection(); - MCSection *RHS_S = BE->getRHS()->FindAssociatedSection(); + MCSection *LHS_S = BE->getLHS()->findAssociatedSection(); + MCSection *RHS_S = BE->getRHS()->findAssociatedSection(); // If either section is absolute, return the other. if (LHS_S == MCSymbol::AbsolutePseudoSection) diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 0dc31215ce91..23afe8054840 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -16,6 +16,15 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +void llvm::dumpBytes(ArrayRef<uint8_t> bytes, raw_ostream &OS) { + static const char hex_rep[] = "0123456789abcdef"; + for (char i: bytes) { + OS << hex_rep[(i & 0xF0) >> 4]; + OS << hex_rep[i & 0xF]; + OS << ' '; + } +} + MCInstPrinter::~MCInstPrinter() { } diff --git a/lib/MC/MCLinkerOptimizationHint.cpp b/lib/MC/MCLinkerOptimizationHint.cpp index 2c9c67cf66af..5f6a57980ad4 100644 --- a/lib/MC/MCLinkerOptimizationHint.cpp +++ b/lib/MC/MCLinkerOptimizationHint.cpp @@ -22,7 +22,7 @@ using namespace llvm; // - Its argN. // <arg1> to <argN> are absolute addresses in the object file, i.e., // relative addresses from the beginning of the object file. -void MCLOHDirective::Emit_impl(raw_ostream &OutStream, +void MCLOHDirective::emit_impl(raw_ostream &OutStream, const MachObjectWriter &ObjWriter, const MCAsmLayout &Layout) const { encodeULEB128(Kind, OutStream); diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 629734036f5b..53cd1317a3d7 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -18,12 +18,11 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCLinkerOptimizationHint.h" -#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolMachO.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -93,9 +92,6 @@ public: void EndCOFFSymbolDef() override { llvm_unreachable("macho doesn't support this directive"); } - void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override { - llvm_unreachable("macho doesn't support this directive"); - } void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr, @@ -162,22 +158,22 @@ void MCMachOStreamer::ChangeSection(MCSection *Section, // Output a linker-local symbol so we don't need section-relative local // relocations. The linker hates us when we do that. - if (LabelSections && !HasSectionLabel[Section]) { + if (LabelSections && !HasSectionLabel[Section] && + !Section->getBeginSymbol()) { MCSymbol *Label = getContext().createLinkerPrivateTempSymbol(); - EmitLabel(Label); + Section->setBeginSymbol(Label); HasSectionLabel[Section] = true; } } void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) { - MCSymbolData &SD = - getAssembler().getOrCreateSymbolData(*Symbol); - if (SD.isExternal()) + getAssembler().registerSymbol(*Symbol); + if (Symbol->isExternal()) EmitSymbolAttribute(EHSymbol, MCSA_Global); - if (SD.getFlags() & SF_WeakDefinition) + if (cast<MCSymbolMachO>(Symbol)->isWeakDefinition()) EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition); - if (SD.isPrivateExtern()) + if (Symbol->isPrivateExtern()) EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern); } @@ -193,7 +189,6 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { MCObjectStreamer::EmitLabel(Symbol); - MCSymbolData &SD = getAssembler().getSymbolData(*Symbol); // This causes the reference type flag to be cleared. Darwin 'as' was "trying" // to clear the weak reference and weak definition bits too, but the // implementation was buggy. For now we just try to match 'as', for @@ -201,7 +196,7 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { // // FIXME: Cleanup this code, these bits should be emitted based on semantic // properties, not on the order of definition, etc. - SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask); + cast<MCSymbolMachO>(Symbol)->clearReferenceType(); } void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) { @@ -276,10 +271,13 @@ void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) { // Remember that the function is a thumb function. Fixup and relocation // values will need adjusted. getAssembler().setIsThumbFunc(Symbol); + cast<MCSymbolMachO>(Symbol)->setThumbFunc(); } -bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, +bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Sym, MCSymbolAttr Attribute) { + MCSymbolMachO *Symbol = cast<MCSymbolMachO>(Sym); + // Indirect symbols are handled differently, to match how 'as' handles // them. This makes writing matching .o files easier. if (Attribute == MCSA_IndirectSymbol) { @@ -287,15 +285,15 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, // important for matching the string table that 'as' generates. IndirectSymbolData ISD; ISD.Symbol = Symbol; - ISD.Section = getCurrentSectionData(); + ISD.Section = getCurrentSectionOnly(); getAssembler().getIndirectSymbols().push_back(ISD); return true; } // Adding a symbol attribute always introduces the symbol, note that an - // important side effect of calling getOrCreateSymbolData here is to register + // important side effect of calling registerSymbol here is to register // the symbol with the assembler. - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + getAssembler().registerSymbol(*Symbol); // The implementation of symbol attributes is designed to match 'as', but it // leaves much to desired. It doesn't really make sense to arbitrarily add and @@ -321,53 +319,54 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, return false; case MCSA_Global: - SD.setExternal(true); + Symbol->setExternal(true); // This effectively clears the undefined lazy bit, in Darwin 'as', although // it isn't very consistent because it implements this as part of symbol // lookup. // // FIXME: Cleanup this code, these bits should be emitted based on semantic // properties, not on the order of definition, etc. - SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy); + Symbol->setReferenceTypeUndefinedLazy(false); break; case MCSA_LazyReference: // FIXME: This requires -dynamic. - SD.setFlags(SD.getFlags() | SF_NoDeadStrip); + Symbol->setNoDeadStrip(); if (Symbol->isUndefined()) - SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy); + Symbol->setReferenceTypeUndefinedLazy(true); break; // Since .reference sets the no dead strip bit, it is equivalent to // .no_dead_strip in practice. case MCSA_Reference: case MCSA_NoDeadStrip: - SD.setFlags(SD.getFlags() | SF_NoDeadStrip); + Symbol->setNoDeadStrip(); break; case MCSA_SymbolResolver: - SD.setFlags(SD.getFlags() | SF_SymbolResolver); + Symbol->setSymbolResolver(); break; case MCSA_PrivateExtern: - SD.setExternal(true); - SD.setPrivateExtern(true); + Symbol->setExternal(true); + Symbol->setPrivateExtern(true); break; case MCSA_WeakReference: // FIXME: This requires -dynamic. if (Symbol->isUndefined()) - SD.setFlags(SD.getFlags() | SF_WeakReference); + Symbol->setWeakReference(); break; case MCSA_WeakDefinition: // FIXME: 'as' enforces that this is defined and global. The manual claims // it has to be in a coalesced section, but this isn't enforced. - SD.setFlags(SD.getFlags() | SF_WeakDefinition); + Symbol->setWeakDefinition(); break; case MCSA_WeakDefAutoPrivate: - SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference); + Symbol->setWeakDefinition(); + Symbol->setWeakReference(); break; } @@ -376,10 +375,8 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { // Encode the 'desc' value into the lowest implementation defined bits. - assert(DescValue == (DescValue & SF_DescFlagsMask) && - "Invalid .desc value!"); - getAssembler().getOrCreateSymbolData(*Symbol).setFlags( - DescValue & SF_DescFlagsMask); + getAssembler().registerSymbol(*Symbol); + cast<MCSymbolMachO>(Symbol)->setDesc(DescValue); } void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -389,9 +386,9 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, AssignSection(Symbol, nullptr); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.setExternal(true); - SD.setCommon(Size, ByteAlignment); + getAssembler().registerSymbol(*Symbol); + Symbol->setExternal(true); + Symbol->setCommon(Size, ByteAlignment); } void MCMachOStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -414,17 +411,17 @@ void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + getAssembler().registerSymbol(*Symbol); // Emit an align fragment if necessary. if (ByteAlignment != 1) new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section); - MCFragment *F = new MCFillFragment(0, 0, Size, Section); - SD.setFragment(F); - AssignSection(Symbol, Section); + MCFragment *F = new MCFillFragment(0, 0, Size, Section); + Symbol->setFragment(F); + // Update the maximum alignment on the zero fill section if necessary. if (ByteAlignment > Section->getAlignment()) Section->setAlignment(ByteAlignment); @@ -466,11 +463,11 @@ void MCMachOStreamer::FinishImpl() { // defining symbols. DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap; for (const MCSymbol &Symbol : getAssembler().symbols()) { - MCSymbolData &SD = Symbol.getData(); - if (getAssembler().isSymbolLinkerVisible(Symbol) && SD.getFragment()) { + if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.getFragment()) { // An atom defining symbol should never be internal to a fragment. - assert(SD.getOffset() == 0 && "Invalid offset in atom defining symbol!"); - DefiningSymbolMap[SD.getFragment()] = &Symbol; + assert(Symbol.getOffset() == 0 && + "Invalid offset in atom defining symbol!"); + DefiningSymbolMap[Symbol.getFragment()] = &Symbol; } } diff --git a/lib/MC/MCMachObjectTargetWriter.cpp b/lib/MC/MCMachObjectTargetWriter.cpp index 146cebf01a3a..4ffd6a78a61f 100644 --- a/lib/MC/MCMachObjectTargetWriter.cpp +++ b/lib/MC/MCMachObjectTargetWriter.cpp @@ -11,12 +11,9 @@ using namespace llvm; -MCMachObjectTargetWriter::MCMachObjectTargetWriter( - bool Is64Bit_, uint32_t CPUType_, uint32_t CPUSubtype_, - bool UseAggressiveSymbolFolding_) - : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_), - UseAggressiveSymbolFolding(UseAggressiveSymbolFolding_) { -} +MCMachObjectTargetWriter::MCMachObjectTargetWriter(bool Is64Bit_, + uint32_t CPUType_, + uint32_t CPUSubtype_) + : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_) {} -MCMachObjectTargetWriter::~MCMachObjectTargetWriter() { -} +MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {} diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index e99f036af160..83a08e28a816 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -39,7 +39,7 @@ static bool useCompactUnwind(const Triple &T) { return false; } -void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { +void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { // MachO SupportsWeakOmittedEHFrame = false; @@ -241,7 +241,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { TLSExtraDataSection = TLSTLVSection; } -void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { +void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { switch (T.getArch()) { case Triple::mips: case Triple::mipsel: @@ -324,10 +324,16 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { case Triple::mipsel: case Triple::mips64: case Triple::mips64el: - // MIPS uses indirect pointer to refer personality functions, so that the - // eh_frame section can be read-only. DW.ref.personality will be generated - // for relocation. + // MIPS uses indirect pointer to refer personality functions and types, so + // that the eh_frame section can be read-only. DW.ref.personality will be + // generated for relocation. PersonalityEncoding = dwarf::DW_EH_PE_indirect; + // FIXME: The N64 ABI probably ought to use DW_EH_PE_sdata8 but we can't + // identify N64 from just a triple. + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + // We don't support PC-relative LSDA references in GAS so we use the default + // DW_EH_PE_absptr for those. break; case Triple::ppc64: case Triple::ppc64le: @@ -514,7 +520,7 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); } -void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { +void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb; CommDirectiveSupportsAlignment = true; @@ -714,6 +720,9 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { ".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); + SXDataSection = Ctx->getCOFFSection(".sxdata", COFF::IMAGE_SCN_LNK_INFO, + SectionKind::getMetadata()); + TLSDataSection = Ctx->getCOFFSection( ".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, @@ -756,15 +765,15 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm, Arch == Triple::UnknownArch) && TT.isOSBinFormatMachO()) { Env = IsMachO; - InitMachOMCObjectFileInfo(TT); + initMachOMCObjectFileInfo(TT); } else if ((Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || Arch == Triple::thumb) && (TT.isOSWindows() && TT.getObjectFormat() == Triple::COFF)) { Env = IsCOFF; - InitCOFFMCObjectFileInfo(TT); + initCOFFMCObjectFileInfo(TT); } else { Env = IsELF; - InitELFMCObjectFileInfo(TT); + initELFMCObjectFileInfo(TT); } } diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 176f5e723d03..6de02bcb02d8 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -29,7 +29,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, *Emitter_, *TAB.createObjectWriter(OS), OS)), - CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {} + EmitEHFrame(true), EmitDebugFrame(false) {} MCObjectStreamer::~MCObjectStreamer() { delete &Assembler->getBackend(); @@ -42,12 +42,13 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) { if (PendingLabels.size()) { if (!F) { F = new MCDataFragment(); - CurSectionData->getFragmentList().insert(CurInsertionPoint, F); - F->setParent(CurSectionData); + MCSection *CurSection = getCurrentSectionOnly(); + CurSection->getFragmentList().insert(CurInsertionPoint, F); + F->setParent(CurSection); } - for (MCSymbolData *SD : PendingLabels) { - SD->setFragment(F); - SD->setOffset(FOffset); + for (MCSymbol *Sym : PendingLabels) { + Sym->setFragment(F); + Sym->setOffset(FOffset); } PendingLabels.clear(); } @@ -56,30 +57,23 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) { bool MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) { - // Must have symbol data. - if (!Assembler->hasSymbolData(*Hi) || !Assembler->hasSymbolData(*Lo)) - return false; - auto &HiD = Assembler->getSymbolData(*Hi); - auto &LoD = Assembler->getSymbolData(*Lo); - // Must both be assigned to the same (valid) fragment. - if (!HiD.getFragment() || HiD.getFragment() != LoD.getFragment()) + if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment()) return false; // Must be a data fragment. - if (!isa<MCDataFragment>(HiD.getFragment())) + if (!isa<MCDataFragment>(Hi->getFragment())) return false; - assert(HiD.getOffset() >= LoD.getOffset() && + assert(Hi->getOffset() >= Lo->getOffset() && "Expected Hi to be greater than Lo"); - EmitIntValue(HiD.getOffset() - LoD.getOffset(), Size); + EmitIntValue(Hi->getOffset() - Lo->getOffset(), Size); return true; } void MCObjectStreamer::reset() { if (Assembler) Assembler->reset(); - CurSectionData = nullptr; CurInsertionPoint = MCSection::iterator(); EmitEHFrame = true; EmitDebugFrame = false; @@ -99,9 +93,9 @@ void MCObjectStreamer::EmitFrames(MCAsmBackend *MAB) { } MCFragment *MCObjectStreamer::getCurrentFragment() const { - assert(getCurrentSectionData() && "No current section!"); + assert(getCurrentSectionOnly() && "No current section!"); - if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin()) + if (CurInsertionPoint != getCurrentSectionOnly()->getFragmentList().begin()) return std::prev(CurInsertionPoint); return nullptr; @@ -120,7 +114,7 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() { } void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) { - Assembler->getOrCreateSymbolData(Sym); + Assembler->registerSymbol(Sym); } void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) { @@ -138,7 +132,7 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, // Avoid fixups when possible. int64_t AbsValue; - if (Value->EvaluateAsAbsolute(AbsValue, getAssembler())) { + if (Value->evaluateAsAbsolute(AbsValue, getAssembler())) { EmitIntValue(AbsValue, Size); return; } @@ -162,8 +156,8 @@ void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { MCStreamer::EmitLabel(Symbol); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); + getAssembler().registerSymbol(*Symbol); + assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!"); // If there is a current fragment, mark the symbol as pointing into it. // Otherwise queue the label and set its fragment pointer when we emit the @@ -171,16 +165,16 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); if (F && !(getAssembler().isBundlingEnabled() && getAssembler().getRelaxAll())) { - SD.setFragment(F); - SD.setOffset(F->getContents().size()); + Symbol->setFragment(F); + Symbol->setOffset(F->getContents().size()); } else { - PendingLabels.push_back(&SD); + PendingLabels.push_back(Symbol); } } void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) { int64_t IntValue; - if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) { + if (Value->evaluateAsAbsolute(IntValue, getAssembler())) { EmitULEB128IntValue(IntValue); return; } @@ -189,7 +183,7 @@ void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) { void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) { int64_t IntValue; - if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) { + if (Value->evaluateAsAbsolute(IntValue, getAssembler())) { EmitSLEB128IntValue(IntValue); return; } @@ -212,21 +206,20 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section, flushPendingLabels(nullptr); bool Created = getAssembler().registerSection(*Section); - CurSectionData = Section; int64_t IntSubsection = 0; if (Subsection && - !Subsection->EvaluateAsAbsolute(IntSubsection, getAssembler())) + !Subsection->evaluateAsAbsolute(IntSubsection, getAssembler())) report_fatal_error("Cannot evaluate subsection number"); if (IntSubsection < 0 || IntSubsection > 8192) report_fatal_error("Subsection number out of range"); CurInsertionPoint = - CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection)); + Section->getSubsectionInsertionPoint(unsigned(IntSubsection)); return Created; } void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { - getAssembler().getOrCreateSymbolData(*Symbol); + getAssembler().registerSymbol(*Symbol); MCStreamer::EmitAssignment(Symbol, Value); } @@ -238,7 +231,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { MCStreamer::EmitInstruction(Inst, STI); - MCSection *Sec = getCurrentSectionData(); + MCSection *Sec = getCurrentSectionOnly(); Sec->setHasInstructions(true); // Now that a machine instruction has been assembled into this section, make @@ -323,10 +316,10 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer &OS, const MCSymbol *A, const MCSymbol *B) { MCContext &Context = OS.getContext(); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - const MCExpr *ARef = MCSymbolRefExpr::Create(A, Variant, Context); - const MCExpr *BRef = MCSymbolRefExpr::Create(B, Variant, Context); + const MCExpr *ARef = MCSymbolRefExpr::create(A, Variant, Context); + const MCExpr *BRef = MCSymbolRefExpr::create(B, Variant, Context); const MCExpr *AddrDelta = - MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context); + MCBinaryExpr::create(MCBinaryExpr::Sub, ARef, BRef, Context); return AddrDelta; } @@ -352,7 +345,7 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, } const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel); int64_t Res; - if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) { + if (AddrDelta->evaluateAsAbsolute(Res, getAssembler())) { MCDwarfLineAddr::Emit(this, LineDelta, Res); return; } @@ -363,7 +356,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, const MCSymbol *Label) { const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel); int64_t Res; - if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) { + if (AddrDelta->evaluateAsAbsolute(Res, getAssembler())) { MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res); return; } @@ -398,7 +391,7 @@ void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment, bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { int64_t Res; - if (Offset->EvaluateAsAbsolute(Res, getAssembler())) { + if (Offset->evaluateAsAbsolute(Res, getAssembler())) { insert(new MCOrgFragment(*Offset, Value)); return false; } @@ -407,11 +400,11 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, EmitLabel(CurrentPos); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; const MCExpr *Ref = - MCSymbolRefExpr::Create(CurrentPos, Variant, getContext()); + MCSymbolRefExpr::create(CurrentPos, Variant, getContext()); const MCExpr *Delta = - MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext()); + MCBinaryExpr::create(MCBinaryExpr::Sub, Offset, Ref, getContext()); - if (!Delta->EvaluateAsAbsolute(Res, getAssembler())) + if (!Delta->evaluateAsAbsolute(Res, getAssembler())) return true; EmitFill(Res, Value); return false; diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index 5cc629b81ba0..347903408737 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -17,7 +17,7 @@ using namespace llvm; MCObjectWriter::~MCObjectWriter() { } -bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved( +bool MCObjectWriter::isSymbolRefDifferenceFullyResolved( const MCAssembler &Asm, const MCSymbolRefExpr *A, const MCSymbolRefExpr *B, bool InSet) const { // Modified symbol references cannot be resolved. @@ -30,16 +30,14 @@ bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved( if (SA.isUndefined() || SB.isUndefined()) return false; - const MCSymbolData &DataA = Asm.getSymbolData(SA); - const MCSymbolData &DataB = Asm.getSymbolData(SB); - if(!DataA.getFragment() || !DataB.getFragment()) + if (!SA.getFragment() || !SB.getFragment()) return false; - return IsSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *DataB.getFragment(), + return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *SB.getFragment(), InSet, false); } -bool MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( +bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, bool IsPCRel) const { const MCSection &SecA = SymA.getSection(); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 1e805fdfcf15..20366dc4e4f5 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -630,13 +630,15 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // If we are generating dwarf for assembly source files save the initial text // section and generate a .file directive. if (getContext().getGenDwarfForAssembly()) { - MCSymbol *SectionStartSym = getContext().createTempSymbol(); - getStreamer().EmitLabel(SectionStartSym); MCSection *Sec = getStreamer().getCurrentSection().first; + if (!Sec->getBeginSymbol()) { + MCSymbol *SectionStartSym = getContext().createTempSymbol(); + getStreamer().EmitLabel(SectionStartSym); + Sec->setBeginSymbol(SectionStartSym); + } bool InsertResult = getContext().addGenDwarfSection(Sec); assert(InsertResult && ".text section should not have debug info yet"); (void)InsertResult; - Sec->setBeginSymbol(SectionStartSym); getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective( 0, StringRef(), getContext().getMainFileName())); } @@ -787,7 +789,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { Lex(); // Eat the operator. if (parsePrimaryExpr(Res, EndLoc)) return true; - Res = MCUnaryExpr::CreateLNot(Res, getContext()); + Res = MCUnaryExpr::createLNot(Res, getContext()); return false; case AsmToken::Dollar: case AsmToken::At: @@ -801,7 +803,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // temporary label to the streamer and refer to it. MCSymbol *Sym = Ctx.createTempSymbol(); Out.EmitLabel(Sym); - Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); EndLoc = FirstTokenLoc; return false; @@ -869,7 +871,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } // Otherwise create a symbol ref. - Res = MCSymbolRefExpr::Create(Sym, Variant, getContext()); + Res = MCSymbolRefExpr::create(Sym, Variant, getContext()); return false; } case AsmToken::BigNum: @@ -877,7 +879,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { case AsmToken::Integer: { SMLoc Loc = getTok().getLoc(); int64_t IntVal = getTok().getIntVal(); - Res = MCConstantExpr::Create(IntVal, getContext()); + Res = MCConstantExpr::create(IntVal, getContext()); EndLoc = Lexer.getTok().getEndLoc(); Lex(); // Eat token. // Look for 'b' or 'f' following an Integer as a directional label @@ -895,7 +897,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { if (IDVal == "f" || IDVal == "b") { MCSymbol *Sym = Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b"); - Res = MCSymbolRefExpr::Create(Sym, Variant, getContext()); + Res = MCSymbolRefExpr::create(Sym, Variant, getContext()); if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); EndLoc = Lexer.getTok().getEndLoc(); @@ -907,7 +909,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { case AsmToken::Real: { APFloat RealVal(APFloat::IEEEdouble, getTok().getString()); uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - Res = MCConstantExpr::Create(IntVal, getContext()); + Res = MCConstantExpr::create(IntVal, getContext()); EndLoc = Lexer.getTok().getEndLoc(); Lex(); // Eat token. return false; @@ -917,7 +919,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // temporary label to the streamer and refer to it. MCSymbol *Sym = Ctx.createTempSymbol(); Out.EmitLabel(Sym); - Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext()); + Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); EndLoc = Lexer.getTok().getEndLoc(); Lex(); // Eat identifier. return false; @@ -934,19 +936,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { Lex(); // Eat the operator. if (parsePrimaryExpr(Res, EndLoc)) return true; - Res = MCUnaryExpr::CreateMinus(Res, getContext()); + Res = MCUnaryExpr::createMinus(Res, getContext()); return false; case AsmToken::Plus: Lex(); // Eat the operator. if (parsePrimaryExpr(Res, EndLoc)) return true; - Res = MCUnaryExpr::CreatePlus(Res, getContext()); + Res = MCUnaryExpr::createPlus(Res, getContext()); return false; case AsmToken::Tilde: Lex(); // Eat the operator. if (parsePrimaryExpr(Res, EndLoc)) return true; - Res = MCUnaryExpr::CreateNot(Res, getContext()); + Res = MCUnaryExpr::createNot(Res, getContext()); return false; } } @@ -979,7 +981,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E, return E; } - return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext()); + return MCSymbolRefExpr::create(&SRE->getSymbol(), Variant, getContext()); } case MCExpr::Unary: { @@ -987,7 +989,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E, const MCExpr *Sub = applyModifierToExpr(UE->getSubExpr(), Variant); if (!Sub) return nullptr; - return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext()); + return MCUnaryExpr::create(UE->getOpcode(), Sub, getContext()); } case MCExpr::Binary: { @@ -1003,7 +1005,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E, if (!RHS) RHS = BE->getRHS(); - return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext()); + return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, getContext()); } } @@ -1052,8 +1054,8 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { // Try to constant fold it up front, if possible. int64_t Value; - if (Res->EvaluateAsAbsolute(Value)) - Res = MCConstantExpr::Create(Value, getContext()); + if (Res->evaluateAsAbsolute(Value)) + Res = MCConstantExpr::create(Value, getContext()); return false; } @@ -1070,7 +1072,7 @@ bool AsmParser::parseAbsoluteExpression(int64_t &Res) { if (parseExpression(Expr)) return true; - if (!Expr->EvaluateAsAbsolute(Res)) + if (!Expr->evaluateAsAbsolute(Res)) return Error(StartLoc, "expected absolute expression"); return false; @@ -1181,7 +1183,7 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, return true; // Merge LHS and RHS according to operator. - Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext()); + Res = MCBinaryExpr::create(Kind, Res, RHS, getContext()); } } @@ -1947,7 +1949,7 @@ bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) { if (Vararg) { if (Lexer.isNot(AsmToken::EndOfStatement)) { StringRef Str = parseStringToEndOfStatement(); - MA.push_back(AsmToken(AsmToken::String, Str)); + MA.emplace_back(AsmToken::String, Str); } return false; } @@ -4344,8 +4346,7 @@ MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); // We Are Anonymous. - MacroLikeBodies.push_back( - MCAsmMacro(StringRef(), Body, MCAsmMacroParameters())); + MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters()); return &MacroLikeBodies.back(); } @@ -4377,7 +4378,7 @@ bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) { return true; int64_t Count; - if (!CountExpr->EvaluateAsAbsolute(Count)) { + if (!CountExpr->evaluateAsAbsolute(Count)) { eatToEndOfStatement(); return Error(CountLoc, "unexpected token in '" + Dir + "' directive"); } @@ -4488,7 +4489,7 @@ bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) { StringRef Values = A.front().front().getString(); for (std::size_t I = 0, End = Values.size(); I != End; ++I) { MCAsmMacroArgument Arg; - Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I + 1))); + Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1)); // Note that the AtPseudoVariable is enabled for instantiations of .irpc. // This is undocumented, but GAS seems to support it. diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index 82f7f2280521..f09bce005d6a 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -57,6 +57,7 @@ class COFFAsmParser : public MCAsmParserExtension { addDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecIdx>(".secidx"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSafeSEH>(".safeseh"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveLinkOnce>(".linkonce"); // Win64 EH directives. @@ -118,6 +119,7 @@ class COFFAsmParser : public MCAsmParserExtension { bool ParseDirectiveEndef(StringRef, SMLoc); bool ParseDirectiveSecRel32(StringRef, SMLoc); bool ParseDirectiveSecIdx(StringRef, SMLoc); + bool ParseDirectiveSafeSEH(StringRef, SMLoc); bool parseCOMDATType(COFF::COMDATType &Type); bool ParseDirectiveLinkOnce(StringRef, SMLoc); @@ -359,7 +361,7 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { COFF::COMDATType Type = (COFF::COMDATType)0; StringRef COMDATSymName; if (getLexer().is(AsmToken::Comma)) { - Type = COFF::IMAGE_COMDAT_SELECT_ANY;; + Type = COFF::IMAGE_COMDAT_SELECT_ANY; Lex(); Flags |= COFF::IMAGE_SCN_LNK_COMDAT; @@ -453,6 +455,21 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) { return false; } +bool COFFAsmParser::ParseDirectiveSafeSEH(StringRef, SMLoc) { + StringRef SymbolID; + if (getParser().parseIdentifier(SymbolID)) + return TokError("expected identifier in directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); + + Lex(); + getStreamer().EmitCOFFSafeSEH(Symbol); + return false; +} + bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) { StringRef SymbolID; if (getParser().parseIdentifier(SymbolID)) diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 87b15ffe7397..e3585bd27632 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -16,7 +16,7 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/ELF.h" using namespace llvm; @@ -209,7 +209,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { StringRef Name; if (getParser().parseIdentifier(Name)) return TokError("expected identifier in directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name)); if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); @@ -222,7 +222,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - getStreamer().EmitELFSize(Sym, Expr); + getStreamer().emitELFSize(Sym, Expr); return false; } @@ -537,9 +537,11 @@ EndStmt: if (getContext().getDwarfVersion() <= 2) Warning(loc, "DWARF2 only supports one section per compilation unit"); - MCSymbol *SectionStartSymbol = getContext().createTempSymbol(); - getStreamer().EmitLabel(SectionStartSymbol); - ELFSection->setBeginSymbol(SectionStartSymbol); + if (!ELFSection->getBeginSymbol()) { + MCSymbol *SectionStartSymbol = getContext().createTempSymbol(); + getStreamer().EmitLabel(SectionStartSymbol); + ELFSection->setBeginSymbol(SectionStartSymbol); + } } } @@ -661,7 +663,7 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) { MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext()); + const MCExpr *Value = MCSymbolRefExpr::create(Sym, getContext()); getStreamer().EmitAssignment(Alias, Value); return false; diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index 04f932bfeec0..9152f2b42a48 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -20,7 +20,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin) - : Begin(Begin), HasInstructions(false), Variant(V), Kind(K) {} + : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false), + IsRegistered(false), Variant(V), Kind(K) {} MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) { if (!End) diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index 4d6298c542e2..ce0b4f5fb411 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -94,7 +94,7 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, break; } assert(COMDATSymbol); - OS << *COMDATSymbol; + COMDATSymbol->print(OS, &MAI); } OS << '\n'; } diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index 3cd84538425d..b4448d79a2d5 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -64,8 +64,10 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, if (ShouldOmitSectionDirective(SectionName, MAI)) { OS << '\t' << getSectionName(); - if (Subsection) - OS << '\t' << *Subsection; + if (Subsection) { + OS << '\t'; + Subsection->print(OS, &MAI); + } OS << '\n'; return; } @@ -153,8 +155,11 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << '\n'; - if (Subsection) - OS << "\t.subsection\t" << *Subsection << '\n'; + if (Subsection) { + OS << "\t.subsection\t"; + Subsection->print(OS, &MAI); + OS << '\n'; + } } bool MCSectionELF::UseCodeAlign() const { diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 9e0cc6b534c1..011969a3da01 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -117,7 +117,7 @@ void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, "SectionRelative value requires 4-bytes"); if (!IsSectionRelative) - EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size); + EmitValueImpl(MCSymbolRefExpr::create(Sym, getContext()), Size); else EmitCOFFSecRel32(Sym); } @@ -133,7 +133,7 @@ void MCStreamer::EmitGPRel32Value(const MCExpr *Value) { /// EmitFill - Emit NumBytes bytes worth of the value specified by /// FillValue. This implements directives such as '.space'. void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) { - const MCExpr *E = MCConstantExpr::Create(FillValue, getContext()); + const MCExpr *E = MCConstantExpr::create(FillValue, getContext()); for (uint64_t i = 0, e = NumBytes; i != e; ++i) EmitValue(E, 1); } @@ -391,11 +391,17 @@ void MCStreamer::EmitCFIWindowSave() { } void MCStreamer::EnsureValidWinFrameInfo() { + const MCAsmInfo *MAI = Context.getAsmInfo(); + if (!MAI->usesWindowsCFI()) + report_fatal_error(".seh_* directives are not supported on this target"); if (!CurrentWinFrameInfo || CurrentWinFrameInfo->End) report_fatal_error("No open Win64 EH frame function!"); } void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) { + const MCAsmInfo *MAI = Context.getAsmInfo(); + if (!MAI->usesWindowsCFI()) + report_fatal_error(".seh_* directives are not supported on this target"); if (CurrentWinFrameInfo && !CurrentWinFrameInfo->End) report_fatal_error("Starting a function before ending the previous one!"); @@ -549,6 +555,9 @@ void MCStreamer::EmitWinCFIEndProlog() { CurrentWinFrameInfo->PrologEnd = Label; } +void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { +} + void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { } @@ -637,7 +646,7 @@ void MCStreamer::EndCOFFSymbolDef() {} void MCStreamer::EmitFileDirective(StringRef Filename) {} void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {} void MCStreamer::EmitCOFFSymbolType(int Type) {} -void MCStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} +void MCStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {} void MCStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, @@ -669,9 +678,9 @@ void MCStreamer::SwitchSection(MCSection *Section, const MCExpr *Subsection) { MCSectionSubPair curSection = SectionStack.back().first; SectionStack.back().second = curSection; if (MCSectionSubPair(Section, Subsection) != curSection) { + ChangeSection(Section, Subsection); SectionStack.back().first = MCSectionSubPair(Section, Subsection); assert(!Section->hasEnded() && "Section already ended"); - ChangeSection(Section, Subsection); MCSymbol *Sym = Section->getBeginSymbol(); if (Sym && !Sym->isInSection()) EmitLabel(Sym); diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 6abdd3acbc5a..7954a02d83b2 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -81,6 +81,11 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) { return FeatureBits; } +FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) { + SubtargetFeatures Features; + FeatureBits = Features.ApplyFeatureFlag(FeatureBits, FS, ProcFeatures); + return FeatureBits; +} MCSchedModel MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index ddc381407df8..8d07b7605cea 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -8,60 +8,38 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; // Sentinel value for the absolute pseudo section. MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1); -static bool isAcceptableChar(char C) { - if ((C < 'a' || C > 'z') && - (C < 'A' || C > 'Z') && - (C < '0' || C > '9') && - C != '_' && C != '$' && C != '.' && C != '@') - return false; - return true; -} - -/// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be -/// syntactically correct. -static bool NameNeedsQuoting(StringRef Str) { - assert(!Str.empty() && "Cannot create an empty MCSymbol"); - - // If any of the characters in the string is an unacceptable character, force - // quotes. - for (unsigned i = 0, e = Str.size(); i != e; ++i) - if (!isAcceptableChar(Str[i])) - return true; - return false; -} - void MCSymbol::setVariableValue(const MCExpr *Value) { assert(!IsUsed && "Cannot set a variable that has already been used."); assert(Value && "Invalid variable value!"); this->Value = Value; - this->Section = nullptr; + SectionOrFragment = nullptr; } -void MCSymbol::print(raw_ostream &OS) const { +void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const { // The name for this MCSymbol is required to be a valid target name. However, // some targets support quoting names with funny characters. If the name // contains a funny character, then print it quoted. StringRef Name = getName(); - if (Name.empty()) { - OS << "\"\""; - return; - } - if (!NameNeedsQuoting(Name)) { + if (!MAI || MAI->isValidUnquotedName(Name)) { OS << Name; return; } + if (MAI && !MAI->supportsNameQuoting()) + report_fatal_error("Symbol name with unsupported characters"); + OS << '"'; - for (unsigned I = 0, E = Name.size(); I != E; ++I) { - char C = Name[I]; + for (char C : Name) { if (C == '\n') OS << "\\n"; else if (C == '"') diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp new file mode 100644 index 000000000000..c3620651f883 --- /dev/null +++ b/lib/MC/MCSymbolELF.cpp @@ -0,0 +1,213 @@ +//===- lib/MC/MCSymbolELF.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/Support/ELF.h" + +namespace llvm { + +namespace { +enum { + // Shift value for STT_* flags. 7 possible values. 3 bits. + ELF_STT_Shift = 0, + + // Shift value for STB_* flags. 4 possible values, 2 bits. + ELF_STB_Shift = 3, + + // Shift value for STV_* flags. 4 possible values, 2 bits. + ELF_STV_Shift = 5, + + // Shift value for STO_* flags. 3 bits. All the values are between 0x20 and + // 0xe0, so we shift right by 5 before storing. + ELF_STO_Shift = 7, + + // One bit. + ELF_IsSignature_Shift = 10, + + // One bit. + ELF_WeakrefUsedInReloc_Shift = 11, + + // One bit. + ELF_UsedInReloc_Shift = 12, + + // One bit. + ELF_BindingSet_Shift = 13 +}; +} + +void MCSymbolELF::setBinding(unsigned Binding) const { + setIsBindingSet(); + unsigned Val; + switch (Binding) { + default: + llvm_unreachable("Unsupported Binding"); + case ELF::STB_LOCAL: + Val = 0; + break; + case ELF::STB_GLOBAL: + Val = 1; + break; + case ELF::STB_WEAK: + Val = 2; + break; + case ELF::STB_GNU_UNIQUE: + Val = 3; + break; + } + uint32_t OtherFlags = getFlags() & ~(0x3 << ELF_STB_Shift); + setFlags(OtherFlags | (Val << ELF_STB_Shift)); +} + +unsigned MCSymbolELF::getBinding() const { + if (isBindingSet()) { + uint32_t Val = (getFlags() & (0x3 << ELF_STB_Shift)) >> ELF_STB_Shift; + switch (Val) { + default: + llvm_unreachable("Invalid value"); + case 0: + return ELF::STB_LOCAL; + case 1: + return ELF::STB_GLOBAL; + case 2: + return ELF::STB_WEAK; + case 3: + return ELF::STB_GNU_UNIQUE; + } + } + + if (isDefined()) + return ELF::STB_LOCAL; + if (isUsedInReloc()) + return ELF::STB_GLOBAL; + if (isWeakrefUsedInReloc()) + return ELF::STB_WEAK; + if (isSignature()) + return ELF::STB_LOCAL; + return ELF::STB_GLOBAL; +} + +void MCSymbolELF::setType(unsigned Type) const { + unsigned Val; + switch (Type) { + default: + llvm_unreachable("Unsupported Binding"); + case ELF::STT_NOTYPE: + Val = 0; + break; + case ELF::STT_OBJECT: + Val = 1; + break; + case ELF::STT_FUNC: + Val = 2; + break; + case ELF::STT_SECTION: + Val = 3; + break; + case ELF::STT_COMMON: + Val = 4; + break; + case ELF::STT_TLS: + Val = 5; + break; + case ELF::STT_GNU_IFUNC: + Val = 6; + break; + } + uint32_t OtherFlags = getFlags() & ~(0x7 << ELF_STT_Shift); + setFlags(OtherFlags | (Val << ELF_STT_Shift)); +} + +unsigned MCSymbolELF::getType() const { + uint32_t Val = (getFlags() & (0x7 << ELF_STT_Shift)) >> ELF_STT_Shift; + switch (Val) { + default: + llvm_unreachable("Invalid value"); + case 0: + return ELF::STT_NOTYPE; + case 1: + return ELF::STT_OBJECT; + case 2: + return ELF::STT_FUNC; + case 3: + return ELF::STT_SECTION; + case 4: + return ELF::STT_COMMON; + case 5: + return ELF::STT_TLS; + case 6: + return ELF::STT_GNU_IFUNC; + } +} + +void MCSymbolELF::setVisibility(unsigned Visibility) { + assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || + Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); + + uint32_t OtherFlags = getFlags() & ~(0x3 << ELF_STV_Shift); + setFlags(OtherFlags | (Visibility << ELF_STV_Shift)); +} + +unsigned MCSymbolELF::getVisibility() const { + unsigned Visibility = (getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift; + assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || + Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); + return Visibility; +} + +void MCSymbolELF::setOther(unsigned Other) { + assert((Other & 0x1f) == 0); + Other >>= 5; + assert(Other <= 0x7); + uint32_t OtherFlags = getFlags() & ~(0x7 << ELF_STO_Shift); + setFlags(OtherFlags | (Other << ELF_STO_Shift)); +} + +unsigned MCSymbolELF::getOther() const { + unsigned Other = (getFlags() & (0x7 << ELF_STO_Shift)) >> ELF_STO_Shift; + return Other << 5; +} + +void MCSymbolELF::setUsedInReloc() const { + uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_UsedInReloc_Shift); + setFlags(OtherFlags | (1 << ELF_UsedInReloc_Shift)); +} + +bool MCSymbolELF::isUsedInReloc() const { + return getFlags() & (0x1 << ELF_UsedInReloc_Shift); +} + +void MCSymbolELF::setIsWeakrefUsedInReloc() const { + uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_WeakrefUsedInReloc_Shift); + setFlags(OtherFlags | (1 << ELF_WeakrefUsedInReloc_Shift)); +} + +bool MCSymbolELF::isWeakrefUsedInReloc() const { + return getFlags() & (0x1 << ELF_WeakrefUsedInReloc_Shift); +} + +void MCSymbolELF::setIsSignature() const { + uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_IsSignature_Shift); + setFlags(OtherFlags | (1 << ELF_IsSignature_Shift)); +} + +bool MCSymbolELF::isSignature() const { + return getFlags() & (0x1 << ELF_IsSignature_Shift); +} + +void MCSymbolELF::setIsBindingSet() const { + uint32_t OtherFlags = getFlags() & ~(0x1 << ELF_BindingSet_Shift); + setFlags(OtherFlags | (1 << ELF_BindingSet_Shift)); +} + +bool MCSymbolELF::isBindingSet() const { + return getFlags() & (0x1 << ELF_BindingSet_Shift); +} +} diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index f87ea67ee460..1b73b7afb6a0 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -51,8 +51,8 @@ static void EmitAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS, const MCSymbol *RHS) { MCContext &Context = Streamer.getContext(); const MCExpr *Diff = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LHS, Context), - MCSymbolRefExpr::Create(RHS, Context), Context); + MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context), + MCSymbolRefExpr::create(RHS, Context), Context); Streamer.EmitValue(Diff, 1); } @@ -126,13 +126,13 @@ static void EmitSymbolRefWithOfs(MCStreamer &streamer, const MCSymbol *Base, const MCSymbol *Other) { MCContext &Context = streamer.getContext(); - const MCSymbolRefExpr *BaseRef = MCSymbolRefExpr::Create(Base, Context); - const MCSymbolRefExpr *OtherRef = MCSymbolRefExpr::Create(Other, Context); - const MCExpr *Ofs = MCBinaryExpr::CreateSub(OtherRef, BaseRef, Context); - const MCSymbolRefExpr *BaseRefRel = MCSymbolRefExpr::Create(Base, + const MCSymbolRefExpr *BaseRef = MCSymbolRefExpr::create(Base, Context); + const MCSymbolRefExpr *OtherRef = MCSymbolRefExpr::create(Other, Context); + const MCExpr *Ofs = MCBinaryExpr::createSub(OtherRef, BaseRef, Context); + const MCSymbolRefExpr *BaseRefRel = MCSymbolRefExpr::create(Base, MCSymbolRefExpr::VK_COFF_IMGREL32, Context); - streamer.EmitValue(MCBinaryExpr::CreateAdd(BaseRefRel, Ofs, Context), 4); + streamer.EmitValue(MCBinaryExpr::createAdd(BaseRefRel, Ofs, Context), 4); } static void EmitRuntimeFunction(MCStreamer &streamer, @@ -142,7 +142,7 @@ static void EmitRuntimeFunction(MCStreamer &streamer, streamer.EmitValueToAlignment(4); EmitSymbolRefWithOfs(streamer, info->Function, info->Begin); EmitSymbolRefWithOfs(streamer, info->Function, info->End); - streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol, + streamer.EmitValue(MCSymbolRefExpr::create(info->Symbol, MCSymbolRefExpr::VK_COFF_IMGREL32, context), 4); } @@ -207,7 +207,7 @@ static void EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { EmitRuntimeFunction(streamer, info->ChainedParent); else if (flags & ((Win64EH::UNW_TerminateHandler|Win64EH::UNW_ExceptionHandler) << 3)) - streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler, + streamer.EmitValue(MCSymbolRefExpr::create(info->ExceptionHandler, MCSymbolRefExpr::VK_COFF_IMGREL32, context), 4); else if (numCodes == 0) { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index ce34ba0e7289..8ce6127e3866 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -15,10 +15,9 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -46,7 +45,7 @@ bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) { // References to weak definitions require external relocation entries; the // definition may not always be the one in the same object file. - if (S.getData().getFlags() & SF_WeakDefinition) + if (cast<MCSymbolMachO>(S).isWeakDefinition()) return true; // Otherwise, we can use an internal relocation. @@ -81,7 +80,7 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S, MCValue Target; - if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr)) + if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr)) report_fatal_error("unable to evaluate offset for variable '" + S.getName() + "'"); @@ -101,7 +100,7 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S, return Address; } - return getSectionAddress(S.getData().getFragment()->getParent()) + + return getSectionAddress(S.getFragment()->getParent()) + Layout.getSymbolOffset(S); } @@ -118,7 +117,7 @@ uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec, return OffsetToAlignment(EndAddr, NextSec.getAlignment()); } -void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, +void MachObjectWriter::writeHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, bool SubsectionsViaSymbols) { uint32_t Flags = 0; @@ -132,27 +131,27 @@ void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, uint64_t Start = OS.tell(); (void) Start; - Write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); + write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); - Write32(TargetObjectWriter->getCPUType()); - Write32(TargetObjectWriter->getCPUSubtype()); + write32(TargetObjectWriter->getCPUType()); + write32(TargetObjectWriter->getCPUSubtype()); - Write32(MachO::MH_OBJECT); - Write32(NumLoadCommands); - Write32(LoadCommandsSize); - Write32(Flags); + write32(MachO::MH_OBJECT); + write32(NumLoadCommands); + write32(LoadCommandsSize); + write32(Flags); if (is64Bit()) - Write32(0); // reserved + write32(0); // reserved assert(OS.tell() - Start == (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header))); } -/// WriteSegmentLoadCommand - Write a segment load command. +/// writeSegmentLoadCommand - Write a segment load command. /// /// \param NumSections The number of sections in this segment. /// \param SectionDataSize The total size of the sections. -void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, +void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections, uint64_t VMSize, uint64_t SectionDataStartOffset, uint64_t SectionDataSize) { @@ -165,34 +164,34 @@ void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, unsigned SegmentLoadCommandSize = is64Bit() ? sizeof(MachO::segment_command_64): sizeof(MachO::segment_command); - Write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); - Write32(SegmentLoadCommandSize + + write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); + write32(SegmentLoadCommandSize + NumSections * (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section))); - WriteBytes("", 16); + writeBytes("", 16); if (is64Bit()) { - Write64(0); // vmaddr - Write64(VMSize); // vmsize - Write64(SectionDataStartOffset); // file offset - Write64(SectionDataSize); // file size + write64(0); // vmaddr + write64(VMSize); // vmsize + write64(SectionDataStartOffset); // file offset + write64(SectionDataSize); // file size } else { - Write32(0); // vmaddr - Write32(VMSize); // vmsize - Write32(SectionDataStartOffset); // file offset - Write32(SectionDataSize); // file size + write32(0); // vmaddr + write32(VMSize); // vmsize + write32(SectionDataStartOffset); // file offset + write32(SectionDataSize); // file size } // maxprot - Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); // initprot - Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); - Write32(NumSections); - Write32(0); // flags + write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + write32(NumSections); + write32(0); // flags assert(OS.tell() - Start == SegmentLoadCommandSize); } -void MachObjectWriter::WriteSection(const MCAssembler &Asm, +void MachObjectWriter::writeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCSection &Sec, uint64_t FileOffset, uint64_t RelocationsStart, @@ -212,36 +211,36 @@ void MachObjectWriter::WriteSection(const MCAssembler &Asm, uint64_t Start = OS.tell(); (void) Start; - WriteBytes(Section.getSectionName(), 16); - WriteBytes(Section.getSegmentName(), 16); + writeBytes(Section.getSectionName(), 16); + writeBytes(Section.getSegmentName(), 16); if (is64Bit()) { - Write64(getSectionAddress(&Sec)); // address - Write64(SectionSize); // size + write64(getSectionAddress(&Sec)); // address + write64(SectionSize); // size } else { - Write32(getSectionAddress(&Sec)); // address - Write32(SectionSize); // size + write32(getSectionAddress(&Sec)); // address + write32(SectionSize); // size } - Write32(FileOffset); + write32(FileOffset); unsigned Flags = Section.getTypeAndAttributes(); if (Section.hasInstructions()) Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!"); - Write32(Log2_32(Section.getAlignment())); - Write32(NumRelocations ? RelocationsStart : 0); - Write32(NumRelocations); - Write32(Flags); - Write32(IndirectSymBase.lookup(&Sec)); // reserved1 - Write32(Section.getStubSize()); // reserved2 + write32(Log2_32(Section.getAlignment())); + write32(NumRelocations ? RelocationsStart : 0); + write32(NumRelocations); + write32(Flags); + write32(IndirectSymBase.lookup(&Sec)); // reserved1 + write32(Section.getStubSize()); // reserved2 if (is64Bit()) - Write32(0); // reserved3 + write32(0); // reserved3 assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section))); } -void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, +void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, uint32_t StringTableOffset, uint32_t StringTableSize) { @@ -250,17 +249,17 @@ void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, uint64_t Start = OS.tell(); (void) Start; - Write32(MachO::LC_SYMTAB); - Write32(sizeof(MachO::symtab_command)); - Write32(SymbolOffset); - Write32(NumSymbols); - Write32(StringTableOffset); - Write32(StringTableSize); + write32(MachO::LC_SYMTAB); + write32(sizeof(MachO::symtab_command)); + write32(SymbolOffset); + write32(NumSymbols); + write32(StringTableOffset); + write32(StringTableSize); assert(OS.tell() - Start == sizeof(MachO::symtab_command)); } -void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, +void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol, uint32_t NumLocalSymbols, uint32_t FirstExternalSymbol, uint32_t NumExternalSymbols, @@ -273,43 +272,37 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, uint64_t Start = OS.tell(); (void) Start; - Write32(MachO::LC_DYSYMTAB); - Write32(sizeof(MachO::dysymtab_command)); - Write32(FirstLocalSymbol); - Write32(NumLocalSymbols); - Write32(FirstExternalSymbol); - Write32(NumExternalSymbols); - Write32(FirstUndefinedSymbol); - Write32(NumUndefinedSymbols); - Write32(0); // tocoff - Write32(0); // ntoc - Write32(0); // modtaboff - Write32(0); // nmodtab - Write32(0); // extrefsymoff - Write32(0); // nextrefsyms - Write32(IndirectSymbolOffset); - Write32(NumIndirectSymbols); - Write32(0); // extreloff - Write32(0); // nextrel - Write32(0); // locreloff - Write32(0); // nlocrel + write32(MachO::LC_DYSYMTAB); + write32(sizeof(MachO::dysymtab_command)); + write32(FirstLocalSymbol); + write32(NumLocalSymbols); + write32(FirstExternalSymbol); + write32(NumExternalSymbols); + write32(FirstUndefinedSymbol); + write32(NumUndefinedSymbols); + write32(0); // tocoff + write32(0); // ntoc + write32(0); // modtaboff + write32(0); // nmodtab + write32(0); // extrefsymoff + write32(0); // nextrefsyms + write32(IndirectSymbolOffset); + write32(NumIndirectSymbols); + write32(0); // extreloff + write32(0); // nextrel + write32(0); // locreloff + write32(0); // nlocrel assert(OS.tell() - Start == sizeof(MachO::dysymtab_command)); } MachObjectWriter::MachSymbolData * MachObjectWriter::findSymbolData(const MCSymbol &Sym) { - for (auto &Entry : LocalSymbolData) - if (Entry.Symbol == &Sym) - return &Entry; - - for (auto &Entry : ExternalSymbolData) - if (Entry.Symbol == &Sym) - return &Entry; - - for (auto &Entry : UndefinedSymbolData) - if (Entry.Symbol == &Sym) - return &Entry; + for (auto *SymbolData : + {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) + for (MachSymbolData &Entry : *SymbolData) + if (Entry.Symbol == &Sym) + return &Entry; return nullptr; } @@ -326,14 +319,13 @@ const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const { return *S; } -void MachObjectWriter::WriteNlist(MachSymbolData &MSD, +void MachObjectWriter::writeNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { const MCSymbol *Symbol = MSD.Symbol; - MCSymbolData &Data = Symbol->getData(); + const MCSymbol &Data = *Symbol; const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol); uint8_t SectionIndex = MSD.SectionIndex; uint8_t Type = 0; - uint16_t Flags = Data.getFlags(); uint64_t Address = 0; bool IsAlias = Symbol != AliasedSymbol; @@ -373,52 +365,37 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, Address = AliaseeInfo->StringIndex; else if (Symbol->isDefined()) Address = getSymbolAddress(OrigSymbol, Layout); - else if (Data.isCommon()) { + else if (Symbol->isCommon()) { // Common symbols are encoded with the size in the address // field, and their alignment in the flags. - Address = Data.getCommonSize(); - - // Common alignment is packed into the 'desc' bits. - if (unsigned Align = Data.getCommonAlignment()) { - unsigned Log2Size = Log2_32(Align); - assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); - if (Log2Size > 15) - report_fatal_error("invalid 'common' alignment '" + - Twine(Align) + "' for '" + Symbol->getName() + "'", - false); - // FIXME: Keep this mask with the SymbolFlags enumeration. - Flags = (Flags & 0xF0FF) | (Log2Size << 8); - } + Address = Symbol->getCommonSize(); } - if (Layout.getAssembler().isThumbFunc(Symbol)) - Flags |= SF_ThumbFunc; - // struct nlist (12 bytes) - Write32(MSD.StringIndex); - Write8(Type); - Write8(SectionIndex); + write32(MSD.StringIndex); + write8(Type); + write8(SectionIndex); // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' // value. - Write16(Flags); + write16(cast<MCSymbolMachO>(Symbol)->getEncodedFlags()); if (is64Bit()) - Write64(Address); + write64(Address); else - Write32(Address); + write32(Address); } -void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, +void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset, uint32_t DataSize) { uint64_t Start = OS.tell(); (void) Start; - Write32(Type); - Write32(sizeof(MachO::linkedit_data_command)); - Write32(DataOffset); - Write32(DataSize); + write32(Type); + write32(sizeof(MachO::linkedit_data_command)); + write32(DataOffset); + write32(DataSize); assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command)); } @@ -427,45 +404,44 @@ static unsigned ComputeLinkerOptionsLoadCommandSize( const std::vector<std::string> &Options, bool is64Bit) { unsigned Size = sizeof(MachO::linker_option_command); - for (unsigned i = 0, e = Options.size(); i != e; ++i) - Size += Options[i].size() + 1; + for (const std::string &Option : Options) + Size += Option.size() + 1; return RoundUpToAlignment(Size, is64Bit ? 8 : 4); } -void MachObjectWriter::WriteLinkerOptionsLoadCommand( +void MachObjectWriter::writeLinkerOptionsLoadCommand( const std::vector<std::string> &Options) { unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); uint64_t Start = OS.tell(); (void) Start; - Write32(MachO::LC_LINKER_OPTION); - Write32(Size); - Write32(Options.size()); + write32(MachO::LC_LINKER_OPTION); + write32(Size); + write32(Options.size()); uint64_t BytesWritten = sizeof(MachO::linker_option_command); - for (unsigned i = 0, e = Options.size(); i != e; ++i) { + for (const std::string &Option : Options) { // Write each string, including the null byte. - const std::string &Option = Options[i]; - WriteBytes(Option.c_str(), Option.size() + 1); + writeBytes(Option.c_str(), Option.size() + 1); BytesWritten += Option.size() + 1; } // Pad to a multiple of the pointer size. - WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); + writeBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); assert(OS.tell() - Start == Size); } -void MachObjectWriter::RecordRelocation(MCAssembler &Asm, +void MachObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { - TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup, + TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup, Target, FixedValue); } -void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { +void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { // This is the point where 'as' creates actual symbols for indirect symbols // (in the following two passes). It would be easier for us to do this sooner // when we see the attribute, but that makes getting the order in the symbol @@ -500,7 +476,7 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { // Initialize the section indirect symbol base, if necessary. IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex)); - Asm.getOrCreateSymbolData(*it->Symbol); + Asm.registerSymbol(*it->Symbol); } // Then lazy symbol pointers and symbol stubs. @@ -520,14 +496,14 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { // // FIXME: Do not hardcode. bool Created; - MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); + Asm.registerSymbol(*it->Symbol, &Created); if (Created) - Entry.setFlags(Entry.getFlags() | 0x0001); + cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true); } } -/// ComputeSymbolTable - Compute the symbol table data -void MachObjectWriter::ComputeSymbolTable( +/// computeSymbolTable - Compute the symbol table data +void MachObjectWriter::computeSymbolTable( MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData, std::vector<MachSymbolData> &ExternalSymbolData, std::vector<MachSymbolData> &UndefinedSymbolData) { @@ -554,13 +530,11 @@ void MachObjectWriter::ComputeSymbolTable( // match 'as'. Even though it doesn't matter for correctness, this is // important for letting us diff .o files. for (const MCSymbol &Symbol : Asm.symbols()) { - MCSymbolData &SD = Symbol.getData(); - // Ignore non-linker visible symbols. if (!Asm.isSymbolLinkerVisible(Symbol)) continue; - if (!SD.isExternal() && !Symbol.isUndefined()) + if (!Symbol.isExternal() && !Symbol.isUndefined()) continue; MachSymbolData MSD; @@ -582,13 +556,11 @@ void MachObjectWriter::ComputeSymbolTable( // Now add the data for local symbols. for (const MCSymbol &Symbol : Asm.symbols()) { - MCSymbolData &SD = Symbol.getData(); - // Ignore non-linker visible symbols. if (!Asm.isSymbolLinkerVisible(Symbol)) continue; - if (SD.isExternal() || Symbol.isUndefined()) + if (Symbol.isExternal() || Symbol.isUndefined()) continue; MachSymbolData MSD; @@ -611,16 +583,13 @@ void MachObjectWriter::ComputeSymbolTable( // Set the symbol indices. Index = 0; - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - LocalSymbolData[i].Symbol->setIndex(Index++); - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - ExternalSymbolData[i].Symbol->setIndex(Index++); - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - UndefinedSymbolData[i].Symbol->setIndex(Index++); + for (auto *SymbolData : + {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) + for (MachSymbolData &Entry : *SymbolData) + Entry.Symbol->setIndex(Index++); for (const MCSection &Section : Asm) { - std::vector<RelAndSymbol> &Relocs = Relocations[&Section]; - for (RelAndSymbol &Rel : Relocs) { + for (RelAndSymbol &Rel : Relocations[&Section]) { if (!Rel.Sym) continue; @@ -638,9 +607,7 @@ void MachObjectWriter::ComputeSymbolTable( void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, const MCAsmLayout &Layout) { uint64_t StartAddress = 0; - const SmallVectorImpl<MCSection *> &Order = Layout.getSectionOrder(); - for (int i = 0, n = Order.size(); i != n ; ++i) { - const MCSection *Sec = Order[i]; + for (const MCSection *Sec : Layout.getSectionOrder()) { StartAddress = RoundUpToAlignment(StartAddress, Sec->getAlignment()); SectionAddress[Sec] = StartAddress; StartAddress += Layout.getSectionAddressSize(Sec); @@ -652,15 +619,15 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, } } -void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, +void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { computeSectionAddresses(Asm, Layout); // Create symbol data for any indirect symbols. - BindIndirectSymbols(Asm); + bindIndirectSymbols(Asm); } -bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( +bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, bool IsPCRel) const { if (InSet) @@ -692,8 +659,7 @@ bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( bool hasReliableSymbolDifference = isX86_64(); if (!hasReliableSymbolDifference) { if (!SA.isInSection() || &SecA != &SecB || - (!SA.isTemporary() && - FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() && + (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() && Asm.getSubsectionsViaSymbols())) return false; return true; @@ -708,16 +674,13 @@ bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( SA.isTemporary() && SA.isInSection() && &SecA == &SecB){ return true; } - } else { - if (!TargetObjectWriter->useAggressiveSymbolFolding()) - return false; } // If they are not in the same section, we can't compute the diff. if (&SecA != &SecB) return false; - const MCFragment *FA = Asm.getSymbolData(SA).getFragment(); + const MCFragment *FA = SA.getFragment(); // Bail if the symbol has no fragment. if (!FA) @@ -731,10 +694,10 @@ bool MachObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( return false; } -void MachObjectWriter::WriteObject(MCAssembler &Asm, +void MachObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { // Compute symbol table information and bind symbol indices. - ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, + computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, UndefinedSymbolData); unsigned NumSections = Asm.size(); @@ -779,12 +742,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, } // Add the linker option load commands sizes. - const std::vector<std::vector<std::string> > &LinkerOptions = - Asm.getLinkerOptions(); - for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { + for (const auto &Option : Asm.getLinkerOptions()) { ++NumLoadCommands; - LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i], - is64Bit()); + LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit()); } // Compute the total size of the section data, as well as its file size and vm @@ -794,9 +754,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, uint64_t SectionDataSize = 0; uint64_t SectionDataFileSize = 0; uint64_t VMSize = 0; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - const MCSection &Sec = *it; + for (const MCSection &Sec : Asm) { uint64_t Address = getSectionAddress(&Sec); uint64_t Size = Layout.getSectionAddressSize(&Sec); uint64_t FileSize = Layout.getSectionFileSize(&Sec); @@ -804,7 +762,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, VMSize = std::max(VMSize, Address + Size); - if (it->isVirtualSection()) + if (Sec.isVirtualSection()) continue; SectionDataSize = std::max(SectionDataSize, Address + Size); @@ -818,19 +776,18 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, SectionDataFileSize += SectionDataPadding; // Write the prolog, starting with the header and load command... - WriteHeader(NumLoadCommands, LoadCommandsSize, + writeHeader(NumLoadCommands, LoadCommandsSize, Asm.getSubsectionsViaSymbols()); - WriteSegmentLoadCommand(NumSections, VMSize, + writeSegmentLoadCommand(NumSections, VMSize, SectionDataStart, SectionDataSize); // ... and then the section headers. uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - std::vector<RelAndSymbol> &Relocs = Relocations[&*it]; + for (const MCSection &Sec : Asm) { + std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; unsigned NumRelocs = Relocs.size(); - uint64_t SectionStart = SectionDataStart + getSectionAddress(&*it); - WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); + uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec); + writeSection(Asm, Layout, Sec, SectionStart, RelocTableEnd, NumRelocs); RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); } @@ -841,11 +798,11 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, assert(VersionInfo.Major < 65536 && "unencodable major target version"); uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) | (VersionInfo.Major << 16); - Write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX : + write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX : MachO::LC_VERSION_MIN_IPHONEOS); - Write32(sizeof(MachO::version_min_command)); - Write32(EncodedVersion); - Write32(0); // reserved. + write32(sizeof(MachO::version_min_command)); + write32(EncodedVersion); + write32(0); // reserved. } // Write the data-in-code load command, if used. @@ -853,14 +810,14 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, if (NumDataRegions) { uint64_t DataRegionsOffset = RelocTableEnd; uint64_t DataRegionsSize = NumDataRegions * 8; - WriteLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, + writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, DataRegionsSize); } // Write the loh load command, if used. uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize; if (LOHSize) - WriteLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, + writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, DataInCodeTableEnd, LOHSize); // Write the symbol table load command, if used. @@ -889,24 +846,21 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); - WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, + writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, StringTableOffset, StringTable.data().size()); - WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, + writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, FirstExternalSymbol, NumExternalSymbols, FirstUndefinedSymbol, NumUndefinedSymbols, IndirectSymbolOffset, NumIndirectSymbols); } // Write the linker options load commands. - for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) { - WriteLinkerOptionsLoadCommand(LinkerOptions[i]); - } + for (const auto &Option : Asm.getLinkerOptions()) + writeLinkerOptionsLoadCommand(Option); // Write the actual section data. - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - MCSection &Sec = *it; + for (const MCSection &Sec : Asm) { Asm.writeSectionData(&Sec, Layout); uint64_t Pad = getPaddingSize(&Sec, Layout); @@ -917,14 +871,13 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, WriteZeros(SectionDataPadding); // Write the relocation entries. - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { + for (const MCSection &Sec : Asm) { // Write the section relocation entries, in reverse order to match 'as' // (approximately, the exact algorithm is more complicated than this). - std::vector<RelAndSymbol> &Relocs = Relocations[&*it]; - for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - Write32(Relocs[e - i - 1].MRE.r_word0); - Write32(Relocs[e - i - 1].MRE.r_word1); + std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; + for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) { + write32(Rel.MRE.r_word0); + write32(Rel.MRE.r_word1); } } @@ -940,9 +893,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, << " end: " << End << "(" << Data->End->getName() << ")" << " size: " << End - Start << "\n"); - Write32(Start); - Write16(End - Start); - Write16(Data->Kind); + write32(Start); + write16(End - Start); + write16(Data->Kind); } // Write out the loh commands, if there is one. @@ -950,9 +903,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, #ifndef NDEBUG unsigned Start = OS.tell(); #endif - Asm.getLOHContainer().Emit(*this, Layout); + Asm.getLOHContainer().emit(*this, Layout); // Pad to a multiple of the pointer size. - WriteBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); + writeBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); assert(OS.tell() - Start == LOHSize); } @@ -968,28 +921,25 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, static_cast<const MCSectionMachO &>(*it->Section); if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) { // If this symbol is defined and internal, mark it as such. - if (it->Symbol->isDefined() && - !Asm.getSymbolData(*it->Symbol).isExternal()) { + if (it->Symbol->isDefined() && !it->Symbol->isExternal()) { uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; if (it->Symbol->isAbsolute()) Flags |= MachO::INDIRECT_SYMBOL_ABS; - Write32(Flags); + write32(Flags); continue; } } - Write32(it->Symbol->getIndex()); + write32(it->Symbol->getIndex()); } // FIXME: Check that offsets match computed ones. // Write the symbol table entries. - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - WriteNlist(LocalSymbolData[i], Layout); - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - WriteNlist(ExternalSymbolData[i], Layout); - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - WriteNlist(UndefinedSymbolData[i], Layout); + for (auto *SymbolData : + {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) + for (MachSymbolData &Entry : *SymbolData) + writeNlist(Entry, Layout); // Write the string table. OS << StringTable.data(); diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 78006e0d7020..76574e987cb1 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/SubtargetFeature.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -59,27 +60,6 @@ static void Split(std::vector<std::string> &V, StringRef S) { V.assign(Tmp.begin(), Tmp.end()); } -/// Join a vector of strings to a string with a comma separating each element. -/// -static std::string Join(const std::vector<std::string> &V) { - // Start with empty string. - std::string Result; - // If the vector is not empty - if (!V.empty()) { - // Start with the first feature - Result = V[0]; - // For each successive feature - for (size_t i = 1; i < V.size(); i++) { - // Add a comma - Result += ","; - // Add the feature - Result += V[i]; - } - } - // Return the features string - return Result; -} - /// Adding features. void SubtargetFeatures::AddFeature(StringRef String, bool Enable) { // Don't add empty features. @@ -144,7 +124,7 @@ SubtargetFeatures::SubtargetFeatures(StringRef Initial) { std::string SubtargetFeatures::getString() const { - return Join(Features); + return join(Features.begin(), Features.end(), ","); } /// SetImpliedBits - For each feature that is (transitively) implied by this @@ -210,6 +190,38 @@ SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature, return Bits; } +FeatureBitset +SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature, + ArrayRef<SubtargetFeatureKV> FeatureTable) { + + assert(hasFlag(Feature)); + + // Find feature in table. + const SubtargetFeatureKV *FeatureEntry = + Find(StripFlag(Feature), FeatureTable); + // If there is a match + if (FeatureEntry) { + // Enable/disable feature in bits + if (isEnabled(Feature)) { + Bits |= FeatureEntry->Value; + + // For each feature that this implies, set it. + SetImpliedBits(Bits, FeatureEntry, FeatureTable); + } else { + Bits &= ~FeatureEntry->Value; + + // For each feature that implies this, clear it. + ClearImpliedBits(Bits, FeatureEntry, FeatureTable); + } + } else { + errs() << "'" << Feature + << "' is not a recognized feature for this target" + << " (ignoring feature)\n"; + } + + return Bits; +} + /// getFeatureBits - Get feature bits a CPU. /// @@ -265,28 +277,7 @@ SubtargetFeatures::getFeatureBits(StringRef CPU, if (Feature == "+help") Help(CPUTable, FeatureTable); - // Find feature in table. - const SubtargetFeatureKV *FeatureEntry = - Find(StripFlag(Feature), FeatureTable); - // If there is a match - if (FeatureEntry) { - // Enable/disable feature in bits - if (isEnabled(Feature)) { - Bits |= FeatureEntry->Value; - - // For each feature that this implies, set it. - SetImpliedBits(Bits, FeatureEntry, FeatureTable); - } else { - Bits &= ~FeatureEntry->Value; - - // For each feature that implies this, clear it. - ClearImpliedBits(Bits, FeatureEntry, FeatureTable); - } - } else { - errs() << "'" << Feature - << "' is not a recognized feature for this target" - << " (ignoring feature)\n"; - } + Bits = ApplyFeatureFlag(Bits, Feature, FeatureTable); } return Bits; diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index c94508563e5f..423c7dce45da 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -21,10 +21,11 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/COFF.h" @@ -76,6 +77,13 @@ public: void set_name_offset(uint32_t Offset); bool should_keep() const; + + int64_t getIndex() const { return Index; } + void setIndex(int Value) { + Index = Value; + if (MC) + MC->setIndex(static_cast<uint32_t>(Value)); + } }; // This class contains staging data for a COFF relocation entry. @@ -161,27 +169,27 @@ public: void WriteFileHeader(const COFF::header &Header); void WriteSymbol(const COFFSymbol &S); void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S); - void WriteSectionHeader(const COFF::section &S); + void writeSectionHeader(const COFF::section &S); void WriteRelocation(const COFF::relocation &R); // MCObjectWriter interface implementation. - void ExecutePostLayoutBinding(MCAssembler &Asm, + void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; - bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, bool IsPCRel) const override; bool isWeak(const MCSymbol &Sym) const override; - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override; - void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; }; } @@ -219,6 +227,10 @@ bool COFFSymbol::should_keep() const { return true; } + // if this is a safeseh handler, keep it + if (MC && (cast<MCSymbolCOFF>(MC)->isSafeSEH())) + return true; + // if the section its in is being droped, drop it if (Section->Number == -1) return false; @@ -364,9 +376,8 @@ void WinCOFFObjectWriter::defineSection(MCSectionCOFF const &Sec) { static uint64_t getSymbolValue(const MCSymbol &Symbol, const MCAsmLayout &Layout) { - const MCSymbolData &Data = Symbol.getData(); - if (Data.isCommon() && Data.isExternal()) - return Data.getCommonSize(); + if (Symbol.isCommon() && Symbol.isExternal()) + return Symbol.getCommonSize(); uint64_t Res; if (!Layout.getSymbolOffset(Symbol, Res)) @@ -383,7 +394,7 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol, COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol); SymbolMap[&Symbol] = coff_symbol; - if (Symbol.getData().getFlags() & COFF::SF_WeakExternal) { + if (cast<MCSymbolCOFF>(Symbol).isWeakExternal()) { coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; if (Symbol.isVariable()) { @@ -414,17 +425,17 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol, coff_symbol->MC = &Symbol; } else { - const MCSymbolData &ResSymData = Assembler.getSymbolData(Symbol); const MCSymbol *Base = Layout.getBaseSymbol(Symbol); coff_symbol->Data.Value = getSymbolValue(Symbol, Layout); - coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0; - coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16; + const MCSymbolCOFF &SymbolCOFF = cast<MCSymbolCOFF>(Symbol); + coff_symbol->Data.Type = SymbolCOFF.getType(); + coff_symbol->Data.StorageClass = SymbolCOFF.getClass(); // If no storage class was specified in the streamer, define it here. - if (coff_symbol->Data.StorageClass == 0) { - bool IsExternal = ResSymData.isExternal() || - (!ResSymData.getFragment() && !Symbol.isVariable()); + if (coff_symbol->Data.StorageClass == COFF::IMAGE_SYM_CLASS_NULL) { + bool IsExternal = Symbol.isExternal() || + (!Symbol.getFragment() && !Symbol.isVariable()); coff_symbol->Data.StorageClass = IsExternal ? COFF::IMAGE_SYM_CLASS_EXTERNAL @@ -434,9 +445,8 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol, if (!Base) { coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE; } else { - const MCSymbolData &BaseData = Assembler.getSymbolData(*Base); - if (BaseData.getFragment()) { - COFFSection *Sec = SectionMap[BaseData.getFragment()->getParent()]; + if (Base->getFragment()) { + COFFSection *Sec = SectionMap[Base->getFragment()->getParent()]; if (coff_symbol->Section && coff_symbol->Section != Sec) report_fatal_error("conflicting sections for symbol"); @@ -535,40 +545,40 @@ bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) { void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) { if (UseBigObj) { - WriteLE16(COFF::IMAGE_FILE_MACHINE_UNKNOWN); - WriteLE16(0xFFFF); - WriteLE16(COFF::BigObjHeader::MinBigObjectVersion); - WriteLE16(Header.Machine); - WriteLE32(Header.TimeDateStamp); - WriteBytes(StringRef(COFF::BigObjMagic, sizeof(COFF::BigObjMagic))); - WriteLE32(0); - WriteLE32(0); - WriteLE32(0); - WriteLE32(0); - WriteLE32(Header.NumberOfSections); - WriteLE32(Header.PointerToSymbolTable); - WriteLE32(Header.NumberOfSymbols); + writeLE16(COFF::IMAGE_FILE_MACHINE_UNKNOWN); + writeLE16(0xFFFF); + writeLE16(COFF::BigObjHeader::MinBigObjectVersion); + writeLE16(Header.Machine); + writeLE32(Header.TimeDateStamp); + writeBytes(StringRef(COFF::BigObjMagic, sizeof(COFF::BigObjMagic))); + writeLE32(0); + writeLE32(0); + writeLE32(0); + writeLE32(0); + writeLE32(Header.NumberOfSections); + writeLE32(Header.PointerToSymbolTable); + writeLE32(Header.NumberOfSymbols); } else { - WriteLE16(Header.Machine); - WriteLE16(static_cast<int16_t>(Header.NumberOfSections)); - WriteLE32(Header.TimeDateStamp); - WriteLE32(Header.PointerToSymbolTable); - WriteLE32(Header.NumberOfSymbols); - WriteLE16(Header.SizeOfOptionalHeader); - WriteLE16(Header.Characteristics); + writeLE16(Header.Machine); + writeLE16(static_cast<int16_t>(Header.NumberOfSections)); + writeLE32(Header.TimeDateStamp); + writeLE32(Header.PointerToSymbolTable); + writeLE32(Header.NumberOfSymbols); + writeLE16(Header.SizeOfOptionalHeader); + writeLE16(Header.Characteristics); } } void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) { - WriteBytes(StringRef(S.Data.Name, COFF::NameSize)); - WriteLE32(S.Data.Value); + writeBytes(StringRef(S.Data.Name, COFF::NameSize)); + writeLE32(S.Data.Value); if (UseBigObj) - WriteLE32(S.Data.SectionNumber); + writeLE32(S.Data.SectionNumber); else - WriteLE16(static_cast<int16_t>(S.Data.SectionNumber)); - WriteLE16(S.Data.Type); - Write8(S.Data.StorageClass); - Write8(S.Data.NumberOfAuxSymbols); + writeLE16(static_cast<int16_t>(S.Data.SectionNumber)); + writeLE16(S.Data.Type); + write8(S.Data.StorageClass); + write8(S.Data.NumberOfAuxSymbols); WriteAuxiliarySymbols(S.Aux); } @@ -578,44 +588,44 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols( i != e; ++i) { switch (i->AuxType) { case ATFunctionDefinition: - WriteLE32(i->Aux.FunctionDefinition.TagIndex); - WriteLE32(i->Aux.FunctionDefinition.TotalSize); - WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber); - WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction); + writeLE32(i->Aux.FunctionDefinition.TagIndex); + writeLE32(i->Aux.FunctionDefinition.TotalSize); + writeLE32(i->Aux.FunctionDefinition.PointerToLinenumber); + writeLE32(i->Aux.FunctionDefinition.PointerToNextFunction); WriteZeros(sizeof(i->Aux.FunctionDefinition.unused)); if (UseBigObj) WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size); break; case ATbfAndefSymbol: WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1)); - WriteLE16(i->Aux.bfAndefSymbol.Linenumber); + writeLE16(i->Aux.bfAndefSymbol.Linenumber); WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2)); - WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction); + writeLE32(i->Aux.bfAndefSymbol.PointerToNextFunction); WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3)); if (UseBigObj) WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size); break; case ATWeakExternal: - WriteLE32(i->Aux.WeakExternal.TagIndex); - WriteLE32(i->Aux.WeakExternal.Characteristics); + writeLE32(i->Aux.WeakExternal.TagIndex); + writeLE32(i->Aux.WeakExternal.Characteristics); WriteZeros(sizeof(i->Aux.WeakExternal.unused)); if (UseBigObj) WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size); break; case ATFile: - WriteBytes( + writeBytes( StringRef(reinterpret_cast<const char *>(&i->Aux), UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size)); break; case ATSectionDefinition: - WriteLE32(i->Aux.SectionDefinition.Length); - WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations); - WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers); - WriteLE32(i->Aux.SectionDefinition.CheckSum); - WriteLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number)); - Write8(i->Aux.SectionDefinition.Selection); + writeLE32(i->Aux.SectionDefinition.Length); + writeLE16(i->Aux.SectionDefinition.NumberOfRelocations); + writeLE16(i->Aux.SectionDefinition.NumberOfLinenumbers); + writeLE32(i->Aux.SectionDefinition.CheckSum); + writeLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number)); + write8(i->Aux.SectionDefinition.Selection); WriteZeros(sizeof(i->Aux.SectionDefinition.unused)); - WriteLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number >> 16)); + writeLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number >> 16)); if (UseBigObj) WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size); break; @@ -623,30 +633,30 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols( } } -void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) { - WriteBytes(StringRef(S.Name, COFF::NameSize)); - - WriteLE32(S.VirtualSize); - WriteLE32(S.VirtualAddress); - WriteLE32(S.SizeOfRawData); - WriteLE32(S.PointerToRawData); - WriteLE32(S.PointerToRelocations); - WriteLE32(S.PointerToLineNumbers); - WriteLE16(S.NumberOfRelocations); - WriteLE16(S.NumberOfLineNumbers); - WriteLE32(S.Characteristics); +void WinCOFFObjectWriter::writeSectionHeader(const COFF::section &S) { + writeBytes(StringRef(S.Name, COFF::NameSize)); + + writeLE32(S.VirtualSize); + writeLE32(S.VirtualAddress); + writeLE32(S.SizeOfRawData); + writeLE32(S.PointerToRawData); + writeLE32(S.PointerToRelocations); + writeLE32(S.PointerToLineNumbers); + writeLE16(S.NumberOfRelocations); + writeLE16(S.NumberOfLineNumbers); + writeLE32(S.Characteristics); } void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) { - WriteLE32(R.VirtualAddress); - WriteLE32(R.SymbolTableIndex); - WriteLE16(R.Type); + writeLE32(R.VirtualAddress); + writeLE32(R.SymbolTableIndex); + writeLE16(R.Type); } //////////////////////////////////////////////////////////////////////////////// // MCObjectWriter interface implementations -void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, +void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { // "Define" each section & symbol. This creates section & symbol // entries in the staging area. @@ -658,23 +668,21 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, DefineSymbol(Symbol, Asm, Layout); } -bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( +bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, bool IsPCRel) const { // MS LINK expects to be able to replace all references to a function with a // thunk to implement their /INCREMENTAL feature. Make sure we don't optimize // away any relocations to functions. - if ((((SymA.getData().getFlags() & COFF::SF_TypeMask) >> - COFF::SF_TypeShift) >> - COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) + uint16_t Type = cast<MCSymbolCOFF>(SymA).getType(); + if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) return false; - return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB, + return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB, InSet, IsPCRel); } bool WinCOFFObjectWriter::isWeak(const MCSymbol &Sym) const { - const MCSymbolData &SD = Sym.getData(); - if (!SD.isExternal()) + if (!Sym.isExternal()) return false; if (!Sym.isInSection()) @@ -690,27 +698,25 @@ bool WinCOFFObjectWriter::isWeak(const MCSymbol &Sym) const { return true; } -void WinCOFFObjectWriter::RecordRelocation( +void WinCOFFObjectWriter::recordRelocation( MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { assert(Target.getSymA() && "Relocation must reference a symbol!"); const MCSymbol &Symbol = Target.getSymA()->getSymbol(); const MCSymbol &A = Symbol; - if (!Asm.hasSymbolData(A)) + if (!A.isRegistered()) Asm.getContext().reportFatalError(Fixup.getLoc(), Twine("symbol '") + A.getName() + "' can not be undefined"); - const MCSymbolData &A_SD = Asm.getSymbolData(A); - MCSection *Section = Fragment->getParent(); // Mark this symbol as requiring an entry in the symbol table. assert(SectionMap.find(Section) != SectionMap.end() && - "Section must already have been defined in ExecutePostLayoutBinding!"); + "Section must already have been defined in executePostLayoutBinding!"); assert(SymbolMap.find(&A) != SymbolMap.end() && - "Symbol must already have been defined in ExecutePostLayoutBinding!"); + "Symbol must already have been defined in executePostLayoutBinding!"); COFFSection *coff_section = SectionMap[Section]; COFFSymbol *coff_symbol = SymbolMap[&A]; @@ -719,14 +725,13 @@ void WinCOFFObjectWriter::RecordRelocation( if (SymB) { const MCSymbol *B = &SymB->getSymbol(); - const MCSymbolData &B_SD = Asm.getSymbolData(*B); - if (!B_SD.getFragment()) + if (!B->getFragment()) Asm.getContext().reportFatalError( Fixup.getLoc(), Twine("symbol '") + B->getName() + "' can not be undefined in a subtraction expression"); - if (!A_SD.getFragment()) + if (!A.getFragment()) Asm.getContext().reportFatalError( Fixup.getLoc(), Twine("symbol '") + Symbol.getName() + @@ -763,9 +768,8 @@ void WinCOFFObjectWriter::RecordRelocation( // Turn relocations for temporary symbols into section relocations. if (coff_symbol->MC->isTemporary() || CrossSection) { Reloc.Symb = coff_symbol->Section->Symbol; - FixedValue += - Layout.getFragmentOffset(coff_symbol->MC->getData().getFragment()) + - coff_symbol->MC->getData().getOffset(); + FixedValue += Layout.getFragmentOffset(coff_symbol->MC->getFragment()) + + coff_symbol->MC->getOffset(); } else Reloc.Symb = coff_symbol; @@ -825,7 +829,7 @@ void WinCOFFObjectWriter::RecordRelocation( coff_section->Relocations.push_back(Reloc); } -void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, +void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { size_t SectionsSize = Sections.size(); if (SectionsSize > static_cast<size_t>(INT32_MAX)) @@ -837,13 +841,9 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, UseBigObj = NumberOfSections > COFF::MaxNumberOfSections16; - DenseMap<COFFSection *, int32_t> SectionIndices( - NextPowerOf2(NumberOfSections)); - // Assign section numbers. size_t Number = 1; for (const auto &Section : Sections) { - SectionIndices[Section.get()] = Number; Section->Number = Number; Section->Symbol->Data.SectionNumber = Number; Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Number; @@ -853,11 +853,10 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, Header.NumberOfSections = NumberOfSections; Header.NumberOfSymbols = 0; - for (auto FI = Asm.file_names_begin(), FE = Asm.file_names_end(); FI != FE; - ++FI) { + for (const std::string &Name : Asm.getFileNames()) { // round up to calculate the number of auxiliary symbols required unsigned SymbolSize = UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size; - unsigned Count = (FI->size() + SymbolSize - 1) / SymbolSize; + unsigned Count = (Name.size() + SymbolSize - 1) / SymbolSize; COFFSymbol *file = createSymbol(".file"); file->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG; @@ -865,15 +864,15 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, file->Aux.resize(Count); unsigned Offset = 0; - unsigned Length = FI->size(); + unsigned Length = Name.size(); for (auto &Aux : file->Aux) { Aux.AuxType = ATFile; if (Length > SymbolSize) { - memcpy(&Aux.Aux, FI->c_str() + Offset, SymbolSize); + memcpy(&Aux.Aux, Name.c_str() + Offset, SymbolSize); Length = Length - SymbolSize; } else { - memcpy(&Aux.Aux, FI->c_str() + Offset, Length); + memcpy(&Aux.Aux, Name.c_str() + Offset, Length); memset((char *)&Aux.Aux + Length, 0, SymbolSize - Length); break; } @@ -887,12 +886,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, if (Symbol->Section) Symbol->Data.SectionNumber = Symbol->Section->Number; if (Symbol->should_keep()) { - Symbol->Index = Header.NumberOfSymbols++; + Symbol->setIndex(Header.NumberOfSymbols++); // Update auxiliary symbol info. Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size(); Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols; - } else - Symbol->Index = -1; + } else { + Symbol->setIndex(-1); + } } // Build string table. @@ -914,11 +914,11 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, // Fixup weak external references. for (auto &Symbol : Symbols) { if (Symbol->Other) { - assert(Symbol->Index != -1); + assert(Symbol->getIndex() != -1); assert(Symbol->Aux.size() == 1 && "Symbol must contain one aux symbol!"); assert(Symbol->Aux[0].AuxType == ATWeakExternal && "Symbol's aux symbol must be a Weak External!"); - Symbol->Aux[0].Aux.WeakExternal.TagIndex = Symbol->Other->Index; + Symbol->Aux[0].Aux.WeakExternal.TagIndex = Symbol->Other->getIndex(); } } @@ -944,8 +944,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, if (Assoc->Number == -1) continue; - Section->Symbol->Aux[0].Aux.SectionDefinition.Number = - SectionIndices[Assoc]; + Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Assoc->Number; } // Assign file offsets to COFF object file structures. @@ -994,8 +993,8 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, offset += COFF::RelocationSize * Sec->Relocations.size(); for (auto &Relocation : Sec->Relocations) { - assert(Relocation.Symb->Index != -1); - Relocation.Data.SymbolTableIndex = Relocation.Symb->Index; + assert(Relocation.Symb->getIndex() != -1); + Relocation.Data.SymbolTableIndex = Relocation.Symb->getIndex(); } } @@ -1021,13 +1020,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, { sections::iterator i, ie; - MCAssembler::const_iterator j, je; + MCAssembler::iterator j, je; for (auto &Section : Sections) { if (Section->Number != -1) { if (Section->Relocations.size() >= 0xffff) Section->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL; - WriteSectionHeader(Section->Header); + writeSectionHeader(Section->Header); } } @@ -1077,7 +1076,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, "Header::PointerToSymbolTable is insane!"); for (auto &Symbol : Symbols) - if (Symbol->Index != -1) + if (Symbol->getIndex() != -1) WriteSymbol(*Symbol); OS.write(Strings.data().data(), Strings.data().size()); diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index d2fbd37499f3..41fc8e4681ef 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -23,7 +22,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/COFF.h" @@ -97,17 +96,17 @@ bool MCWinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, Symbol->getSection().getVariant() == MCSection::SV_COFF) && "Got non-COFF section in the COFF backend!"); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + getAssembler().registerSymbol(*Symbol); switch (Attribute) { default: return false; case MCSA_WeakReference: case MCSA_Weak: - SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal); - SD.setExternal(true); + cast<MCSymbolCOFF>(Symbol)->setIsWeakExternal(); + Symbol->setExternal(true); break; case MCSA_Global: - SD.setExternal(true); + Symbol->setExternal(true); break; } @@ -134,11 +133,11 @@ void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { FatalError("storage class specified outside of symbol definition"); if (StorageClass & ~COFF::SSC_Invalid) - FatalError(Twine("storage class value '") + itostr(StorageClass) + + FatalError("storage class value '" + Twine(StorageClass) + "' out of range"); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol); - SD.modifyFlags(StorageClass << COFF::SF_ClassShift, COFF::SF_ClassMask); + getAssembler().registerSymbol(*CurSymbol); + cast<MCSymbolCOFF>(CurSymbol)->setClass((uint16_t)StorageClass); } void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { @@ -146,10 +145,10 @@ void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { FatalError("symbol type specified outside of a symbol definition"); if (Type & ~0xffff) - FatalError(Twine("type value '") + itostr(Type) + "' out of range"); + FatalError("type value '" + Twine(Type) + "' out of range"); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol); - SD.modifyFlags(Type << COFF::SF_TypeShift, COFF::SF_TypeMask); + getAssembler().registerSymbol(*CurSymbol); + cast<MCSymbolCOFF>(CurSymbol)->setType((uint16_t)Type); } void MCWinCOFFStreamer::EndCOFFSymbolDef() { @@ -158,9 +157,30 @@ void MCWinCOFFStreamer::EndCOFFSymbolDef() { CurSymbol = nullptr; } +void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { + // SafeSEH is a feature specific to 32-bit x86. It does not exist (and is + // unnecessary) on all platforms which use table-based exception dispatch. + if (getContext().getObjectFileInfo()->getTargetTriple().getArch() != + Triple::x86) + return; + + if (cast<MCSymbolCOFF>(Symbol)->isSafeSEH()) + return; + + MCSection *SXData = getContext().getObjectFileInfo()->getSXDataSection(); + getAssembler().registerSection(*SXData); + if (SXData->getAlignment() < 4) + SXData->setAlignment(4); + + new MCSafeSEHFragment(Symbol, SXData); + + getAssembler().registerSymbol(*Symbol); + cast<MCSymbolCOFF>(Symbol)->setIsSafeSEH(); +} + void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { MCDataFragment *DF = getOrCreateDataFragment(); - const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext()); + const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext()); MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_2); DF->getFixups().push_back(Fixup); DF->getContents().resize(DF->getContents().size() + 2, 0); @@ -168,16 +188,12 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { MCDataFragment *DF = getOrCreateDataFragment(); - const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext()); + const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext()); MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_4); DF->getFixups().push_back(Fixup); DF->getContents().resize(DF->getContents().size() + 4, 0); } -void MCWinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - llvm_unreachable("not supported"); -} - void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { assert((!Symbol->isInSection() || @@ -195,9 +211,9 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, AssignSection(Symbol, nullptr); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.setExternal(true); - SD.setCommon(Size, ByteAlignment); + getAssembler().registerSymbol(*Symbol); + Symbol->setExternal(true); + Symbol->setCommon(Size, ByteAlignment); if (!T.isKnownWindowsMSVCEnvironment() && ByteAlignment > 1) { SmallString<128> Directive; @@ -224,8 +240,8 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, if (Section->getAlignment() < ByteAlignment) Section->setAlignment(ByteAlignment); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.setExternal(false); + getAssembler().registerSymbol(*Symbol); + Symbol->setExternal(false); AssignSection(Symbol, Section); @@ -235,7 +251,7 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, MCFillFragment *Fragment = new MCFillFragment( /*Value=*/0, /*ValueSize=*/0, Size, Section); - SD.setFragment(Fragment); + Symbol->setFragment(Fragment); } void MCWinCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index fb91eed29097..54ed954a90d9 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -224,7 +224,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) child_iterator e = child_end(); if (i == e) { - ec = object_error::success; + ec = std::error_code(); return; } @@ -254,7 +254,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) SymbolTable = i; ++i; FirstRegular = i; - ec = object_error::success; + ec = std::error_code(); return; } @@ -298,14 +298,14 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) StringTable = i; ++i; FirstRegular = i; - ec = object_error::success; + ec = std::error_code(); return; } if (Name[0] != '/') { Format = has64SymTable ? K_MIPS64 : K_GNU; FirstRegular = i; - ec = object_error::success; + ec = std::error_code(); return; } @@ -320,7 +320,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) ++i; if (i == e) { FirstRegular = i; - ec = object_error::success; + ec = std::error_code(); return; } @@ -332,7 +332,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) } FirstRegular = i; - ec = object_error::success; + ec = std::error_code(); } Archive::child_iterator Archive::child_begin(bool SkipInternal) const { diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp new file mode 100644 index 000000000000..90a736f3baf4 --- /dev/null +++ b/lib/Object/ArchiveWriter.cpp @@ -0,0 +1,342 @@ +//===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeArchive function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/raw_ostream.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +using namespace llvm; + +NewArchiveIterator::NewArchiveIterator() {} + +NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I, + StringRef Name) + : IsNewMember(false), Name(Name), OldI(I) {} + +NewArchiveIterator::NewArchiveIterator(StringRef NewFilename, StringRef Name) + : IsNewMember(true), Name(Name), NewFilename(NewFilename) {} + +StringRef NewArchiveIterator::getName() const { return Name; } + +bool NewArchiveIterator::isNewMember() const { return IsNewMember; } + +object::Archive::child_iterator NewArchiveIterator::getOld() const { + assert(!IsNewMember); + return OldI; +} + +StringRef NewArchiveIterator::getNew() const { + assert(IsNewMember); + return NewFilename; +} + +llvm::ErrorOr<int> +NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const { + assert(IsNewMember); + int NewFD; + if (auto EC = sys::fs::openFileForRead(NewFilename, NewFD)) + return EC; + assert(NewFD != -1); + + if (auto EC = sys::fs::status(NewFD, NewStatus)) + return EC; + + // Opening a directory doesn't make sense. Let it fail. + // Linux cannot open directories with open(2), although + // cygwin and *bsd can. + if (NewStatus.type() == sys::fs::file_type::directory_file) + return make_error_code(std::errc::is_a_directory); + + return NewFD; +} + +template <typename T> +static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size, + bool MayTruncate = false) { + uint64_t OldPos = OS.tell(); + OS << Data; + unsigned SizeSoFar = OS.tell() - OldPos; + if (Size > SizeSoFar) { + unsigned Remaining = Size - SizeSoFar; + for (unsigned I = 0; I < Remaining; ++I) + OS << ' '; + } else if (Size < SizeSoFar) { + assert(MayTruncate && "Data doesn't fit in Size"); + // Some of the data this is used for (like UID) can be larger than the + // space available in the archive format. Truncate in that case. + OS.seek(OldPos + Size); + } +} + +static void print32BE(raw_fd_ostream &Out, unsigned Val) { + // FIXME: Should use Endian.h here. + for (int I = 3; I >= 0; --I) { + char V = (Val >> (8 * I)) & 0xff; + Out << V; + } +} + +static void printRestOfMemberHeader(raw_fd_ostream &Out, + const sys::TimeValue &ModTime, unsigned UID, + unsigned GID, unsigned Perms, + unsigned Size) { + printWithSpacePadding(Out, ModTime.toEpochTime(), 12); + printWithSpacePadding(Out, UID, 6, true); + printWithSpacePadding(Out, GID, 6, true); + printWithSpacePadding(Out, format("%o", Perms), 8); + printWithSpacePadding(Out, Size, 10); + Out << "`\n"; +} + +static void printMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimeValue &ModTime, unsigned UID, + unsigned GID, unsigned Perms, unsigned Size) { + printWithSpacePadding(Out, Twine(Name) + "/", 16); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); +} + +static void printMemberHeader(raw_fd_ostream &Out, unsigned NameOffset, + const sys::TimeValue &ModTime, unsigned UID, + unsigned GID, unsigned Perms, unsigned Size) { + Out << '/'; + printWithSpacePadding(Out, NameOffset, 15); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); +} + +static void writeStringTable(raw_fd_ostream &Out, + ArrayRef<NewArchiveIterator> Members, + std::vector<unsigned> &StringMapIndexes) { + unsigned StartOffset = 0; + for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(), + E = Members.end(); + I != E; ++I) { + StringRef Name = I->getName(); + if (Name.size() < 16) + continue; + if (StartOffset == 0) { + printWithSpacePadding(Out, "//", 58); + Out << "`\n"; + StartOffset = Out.tell(); + } + StringMapIndexes.push_back(Out.tell() - StartOffset); + Out << Name << "/\n"; + } + if (StartOffset == 0) + return; + if (Out.tell() % 2) + Out << '\n'; + int Pos = Out.tell(); + Out.seek(StartOffset - 12); + printWithSpacePadding(Out, Pos - StartOffset, 10); + Out.seek(Pos); +} + +// Returns the offset of the first reference to a member offset. +static ErrorOr<unsigned> +writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members, + ArrayRef<MemoryBufferRef> Buffers, + std::vector<unsigned> &MemberOffsetRefs) { + unsigned StartOffset = 0; + unsigned MemberNum = 0; + std::string NameBuf; + raw_string_ostream NameOS(NameBuf); + unsigned NumSyms = 0; + LLVMContext Context; + for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(), + E = Members.end(); + I != E; ++I, ++MemberNum) { + MemoryBufferRef MemberBuffer = Buffers[MemberNum]; + ErrorOr<std::unique_ptr<object::SymbolicFile>> ObjOrErr = + object::SymbolicFile::createSymbolicFile( + MemberBuffer, sys::fs::file_magic::unknown, &Context); + if (!ObjOrErr) + continue; // FIXME: check only for "not an object file" errors. + object::SymbolicFile &Obj = *ObjOrErr.get(); + + if (!StartOffset) { + printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0); + StartOffset = Out.tell(); + print32BE(Out, 0); + } + + for (const object::BasicSymbolRef &S : Obj.symbols()) { + uint32_t Symflags = S.getFlags(); + if (Symflags & object::SymbolRef::SF_FormatSpecific) + continue; + if (!(Symflags & object::SymbolRef::SF_Global)) + continue; + if (Symflags & object::SymbolRef::SF_Undefined) + continue; + if (auto EC = S.printName(NameOS)) + return EC; + NameOS << '\0'; + ++NumSyms; + MemberOffsetRefs.push_back(MemberNum); + print32BE(Out, 0); + } + } + Out << NameOS.str(); + + if (StartOffset == 0) + return 0; + + if (Out.tell() % 2) + Out << '\0'; + + unsigned Pos = Out.tell(); + Out.seek(StartOffset - 12); + printWithSpacePadding(Out, Pos - StartOffset, 10); + Out.seek(StartOffset); + print32BE(Out, NumSyms); + Out.seek(Pos); + return StartOffset + 4; +} + +std::pair<StringRef, std::error_code> +llvm::writeArchive(StringRef ArcName, + std::vector<NewArchiveIterator> &NewMembers, + bool WriteSymtab) { + SmallString<128> TmpArchive; + int TmpArchiveFD; + if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", + TmpArchiveFD, TmpArchive)) + return std::make_pair(ArcName, EC); + + tool_output_file Output(TmpArchive, TmpArchiveFD); + raw_fd_ostream &Out = Output.os(); + Out << "!<arch>\n"; + + std::vector<unsigned> MemberOffsetRefs; + + std::vector<std::unique_ptr<MemoryBuffer>> Buffers; + std::vector<MemoryBufferRef> Members; + std::vector<sys::fs::file_status> NewMemberStatus; + + for (unsigned I = 0, N = NewMembers.size(); I < N; ++I) { + NewArchiveIterator &Member = NewMembers[I]; + MemoryBufferRef MemberRef; + + if (Member.isNewMember()) { + StringRef Filename = Member.getNew(); + NewMemberStatus.resize(NewMemberStatus.size() + 1); + sys::fs::file_status &Status = NewMemberStatus.back(); + ErrorOr<int> FD = Member.getFD(Status); + if (auto EC = FD.getError()) + return std::make_pair(Filename, EC); + ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = + MemoryBuffer::getOpenFile(FD.get(), Filename, Status.getSize(), + false); + if (auto EC = MemberBufferOrErr.getError()) + return std::make_pair(Filename, EC); + if (close(FD.get()) != 0) + return std::make_pair(Filename, + std::error_code(errno, std::generic_category())); + Buffers.push_back(std::move(MemberBufferOrErr.get())); + MemberRef = Buffers.back()->getMemBufferRef(); + } else { + object::Archive::child_iterator OldMember = Member.getOld(); + ErrorOr<MemoryBufferRef> MemberBufferOrErr = + OldMember->getMemoryBufferRef(); + if (auto EC = MemberBufferOrErr.getError()) + return std::make_pair("", EC); + MemberRef = MemberBufferOrErr.get(); + } + Members.push_back(MemberRef); + } + + unsigned MemberReferenceOffset = 0; + if (WriteSymtab) { + ErrorOr<unsigned> MemberReferenceOffsetOrErr = + writeSymbolTable(Out, NewMembers, Members, MemberOffsetRefs); + if (auto EC = MemberReferenceOffsetOrErr.getError()) + return std::make_pair(ArcName, EC); + MemberReferenceOffset = MemberReferenceOffsetOrErr.get(); + } + + std::vector<unsigned> StringMapIndexes; + writeStringTable(Out, NewMembers, StringMapIndexes); + + unsigned MemberNum = 0; + unsigned LongNameMemberNum = 0; + unsigned NewMemberNum = 0; + std::vector<unsigned> MemberOffset; + for (std::vector<NewArchiveIterator>::iterator I = NewMembers.begin(), + E = NewMembers.end(); + I != E; ++I, ++MemberNum) { + + unsigned Pos = Out.tell(); + MemberOffset.push_back(Pos); + + MemoryBufferRef File = Members[MemberNum]; + if (I->isNewMember()) { + StringRef FileName = I->getNew(); + const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum]; + NewMemberNum++; + + StringRef Name = sys::path::filename(FileName); + if (Name.size() < 16) + printMemberHeader(Out, Name, Status.getLastModificationTime(), + Status.getUser(), Status.getGroup(), + Status.permissions(), Status.getSize()); + else + printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++], + Status.getLastModificationTime(), Status.getUser(), + Status.getGroup(), Status.permissions(), + Status.getSize()); + } else { + object::Archive::child_iterator OldMember = I->getOld(); + StringRef Name = I->getName(); + + if (Name.size() < 16) + printMemberHeader(Out, Name, OldMember->getLastModified(), + OldMember->getUID(), OldMember->getGID(), + OldMember->getAccessMode(), OldMember->getSize()); + else + printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++], + OldMember->getLastModified(), OldMember->getUID(), + OldMember->getGID(), OldMember->getAccessMode(), + OldMember->getSize()); + } + + Out << File.getBuffer(); + + if (Out.tell() % 2) + Out << '\n'; + } + + if (MemberReferenceOffset) { + Out.seek(MemberReferenceOffset); + for (unsigned MemberNum : MemberOffsetRefs) + print32BE(Out, MemberOffset[MemberNum]); + } + + Output.keep(); + Out.close(); + sys::fs::rename(TmpArchive, ArcName); + return std::make_pair("", std::error_code()); +} diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 37add22fac58..17aac8b41211 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMObject Archive.cpp + ArchiveWriter.cpp Binary.cpp COFFObjectFile.cpp COFFYAML.cpp diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 74709c88ba26..1055b987d7ef 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -46,7 +46,7 @@ static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr, Addr < uintptr_t(M.getBufferStart())) { return object_error::unexpected_eof; } - return object_error::success; + return std::error_code(); } // Sets Obj unless any bytes in [addr, addr + size) fall outsize of m. @@ -59,7 +59,7 @@ static std::error_code getObject(const T *&Obj, MemoryBufferRef M, if (std::error_code EC = checkOffset(M, Addr, Size)) return EC; Obj = reinterpret_cast<const T *>(Addr); - return object_error::success; + return std::error_code(); } // Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str without @@ -156,11 +156,11 @@ std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref, if (Symb.isAnyUndefined()) { Result = UnknownAddressOrSize; - return object_error::success; + return std::error_code(); } if (Symb.isCommon()) { Result = UnknownAddressOrSize; - return object_error::success; + return std::error_code(); } int32_t SectionNumber = Symb.getSectionNumber(); if (!COFF::isReservedSectionNumber(SectionNumber)) { @@ -169,11 +169,11 @@ std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref, return EC; Result = Section->VirtualAddress + Symb.getValue(); - return object_error::success; + return std::error_code(); } Result = Symb.getValue(); - return object_error::success; + return std::error_code(); } std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref, @@ -205,7 +205,7 @@ std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref, COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)) Result = SymbolRef::ST_Data; } - return object_error::success; + return std::error_code(); } uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { @@ -236,16 +236,12 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { return Result; } -std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref, - uint64_t &Result) const { +uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Ref) const { COFFSymbolRef Symb = getCOFFSymbol(Ref); if (Symb.isCommon()) - Result = Symb.getValue(); - else - Result = UnknownAddressOrSize; - - return object_error::success; + return Symb.getValue(); + return UnknownAddressOrSize; } std::error_code @@ -262,7 +258,7 @@ COFFObjectFile::getSymbolSection(DataRefImpl Ref, Ref.p = reinterpret_cast<uintptr_t>(Sec); Result = section_iterator(SectionRef(Ref, this)); } - return object_error::success; + return std::error_code(); } void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const { @@ -421,7 +417,7 @@ std::error_code COFFObjectFile::initSymbolTablePtr() { // Check that the string table is null terminated if has any in it. if (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0) return object_error::parse_failed; - return object_error::success; + return std::error_code(); } // Returns the file offset for the given VA. @@ -442,7 +438,7 @@ std::error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { if (SectionStart <= Addr && Addr < SectionEnd) { uint32_t Offset = Addr - SectionStart; Res = uintptr_t(base()) + Section->PointerToRawData + Offset; - return object_error::success; + return std::error_code(); } } return object_error::parse_failed; @@ -458,7 +454,7 @@ std::error_code COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint, const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(IntPtr); Hint = *reinterpret_cast<const ulittle16_t *>(Ptr); Name = StringRef(reinterpret_cast<const char *>(Ptr + 2)); - return object_error::success; + return std::error_code(); } // Find the import table. @@ -467,11 +463,11 @@ std::error_code COFFObjectFile::initImportTablePtr() { // the import table, do nothing. const data_directory *DataEntry; if (getDataDirectory(COFF::IMPORT_TABLE, DataEntry)) - return object_error::success; + return std::error_code(); // Do nothing if the pointer to import table is NULL. if (DataEntry->RelativeVirtualAddress == 0) - return object_error::success; + return std::error_code(); uint32_t ImportTableRva = DataEntry->RelativeVirtualAddress; // -1 because the last entry is the null entry. @@ -485,16 +481,16 @@ std::error_code COFFObjectFile::initImportTablePtr() { return EC; ImportDirectory = reinterpret_cast< const import_directory_table_entry *>(IntPtr); - return object_error::success; + return std::error_code(); } // Initializes DelayImportDirectory and NumberOfDelayImportDirectory. std::error_code COFFObjectFile::initDelayImportTablePtr() { const data_directory *DataEntry; if (getDataDirectory(COFF::DELAY_IMPORT_DESCRIPTOR, DataEntry)) - return object_error::success; + return std::error_code(); if (DataEntry->RelativeVirtualAddress == 0) - return object_error::success; + return std::error_code(); uint32_t RVA = DataEntry->RelativeVirtualAddress; NumberOfDelayImportDirectory = DataEntry->Size / @@ -505,7 +501,7 @@ std::error_code COFFObjectFile::initDelayImportTablePtr() { return EC; DelayImportDirectory = reinterpret_cast< const delay_import_directory_table_entry *>(IntPtr); - return object_error::success; + return std::error_code(); } // Find the export table. @@ -514,11 +510,11 @@ std::error_code COFFObjectFile::initExportTablePtr() { // the export table, do nothing. const data_directory *DataEntry; if (getDataDirectory(COFF::EXPORT_TABLE, DataEntry)) - return object_error::success; + return std::error_code(); // Do nothing if the pointer to export table is NULL. if (DataEntry->RelativeVirtualAddress == 0) - return object_error::success; + return std::error_code(); uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress; uintptr_t IntPtr = 0; @@ -526,15 +522,15 @@ std::error_code COFFObjectFile::initExportTablePtr() { return EC; ExportDirectory = reinterpret_cast<const export_directory_table_entry *>(IntPtr); - return object_error::success; + return std::error_code(); } std::error_code COFFObjectFile::initBaseRelocPtr() { const data_directory *DataEntry; if (getDataDirectory(COFF::BASE_RELOCATION_TABLE, DataEntry)) - return object_error::success; + return std::error_code(); if (DataEntry->RelativeVirtualAddress == 0) - return object_error::success; + return std::error_code(); uintptr_t IntPtr = 0; if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) @@ -543,7 +539,7 @@ std::error_code COFFObjectFile::initBaseRelocPtr() { IntPtr); BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>( IntPtr + DataEntry->Size); - return object_error::success; + return std::error_code(); } COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) @@ -608,7 +604,7 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) if (COFFHeader) { // The prior checkSize call may have failed. This isn't a hard error // because we were just trying to sniff out bigobj. - EC = object_error::success; + EC = std::error_code(); CurPtr += sizeof(coff_file_header); if (COFFHeader->isImportLibrary()) @@ -670,7 +666,7 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) if ((EC = initBaseRelocPtr())) return; - EC = object_error::success; + EC = std::error_code(); } basic_symbol_iterator COFFObjectFile::symbol_begin_impl() const { @@ -796,13 +792,13 @@ iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const { std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const { Res = PE32Header; - return object_error::success; + return std::error_code(); } std::error_code COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const { Res = PE32PlusHeader; - return object_error::success; + return std::error_code(); } std::error_code @@ -821,18 +817,18 @@ COFFObjectFile::getDataDirectory(uint32_t Index, return object_error::parse_failed; } Res = &DataDirectory[Index]; - return object_error::success; + return std::error_code(); } std::error_code COFFObjectFile::getSection(int32_t Index, const coff_section *&Result) const { Result = nullptr; if (COFF::isReservedSectionNumber(Index)) - return object_error::success; + return std::error_code(); if (static_cast<uint32_t>(Index) <= getNumberOfSections()) { // We already verified the section table data, so no need to check again. Result = SectionTable + (Index - 1); - return object_error::success; + return std::error_code(); } return object_error::parse_failed; } @@ -845,7 +841,7 @@ std::error_code COFFObjectFile::getString(uint32_t Offset, if (Offset >= StringTableSize) return object_error::unexpected_eof; Result = StringRef(StringTable + Offset); - return object_error::success; + return std::error_code(); } std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol, @@ -855,7 +851,7 @@ std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol, uint32_t Offset = Symbol.getStringTableOffset().Offset; if (std::error_code EC = getString(Offset, Res)) return EC; - return object_error::success; + return std::error_code(); } if (Symbol.getShortName()[COFF::NameSize - 1] == 0) @@ -864,7 +860,7 @@ std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol, else // Not null terminated, use all 8 bytes. Res = StringRef(Symbol.getShortName(), COFF::NameSize); - return object_error::success; + return std::error_code(); } ArrayRef<uint8_t> @@ -915,7 +911,7 @@ std::error_code COFFObjectFile::getSectionName(const coff_section *Sec, } Res = Name; - return object_error::success; + return std::error_code(); } uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const { @@ -953,7 +949,7 @@ COFFObjectFile::getSectionContents(const coff_section *Sec, if (checkOffset(Data, ConStart, SectionSize)) return object_error::parse_failed; Res = makeArrayRef(reinterpret_cast<const uint8_t *>(ConStart), SectionSize); - return object_error::success; + return std::error_code(); } const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const { @@ -978,7 +974,7 @@ std::error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel, getObject(VirtualAddressPtr, Data, &R->VirtualAddress)) return EC; Res = *VirtualAddressPtr; - return object_error::success; + return std::error_code(); } symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { @@ -1012,7 +1008,7 @@ std::error_code COFFObjectFile::getRelocationType(DataRefImpl Rel, uint64_t &Res) const { const coff_relocation* R = toRel(Rel); Res = R->Type; - return object_error::success; + return std::error_code(); } const coff_section * @@ -1113,27 +1109,11 @@ COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, Res = "Unknown"; } Result.append(Res.begin(), Res.end()); - return object_error::success; + return std::error_code(); } #undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME -std::error_code -COFFObjectFile::getRelocationValueString(DataRefImpl Rel, - SmallVectorImpl<char> &Result) const { - const coff_relocation *Reloc = toRel(Rel); - DataRefImpl Sym; - ErrorOr<COFFSymbolRef> Symb = getSymbol(Reloc->SymbolTableIndex); - if (std::error_code EC = Symb.getError()) - return EC; - Sym.p = reinterpret_cast<uintptr_t>(Symb->getRawPtr()); - StringRef SymName; - if (std::error_code EC = getSymbolName(Sym, SymName)) - return EC; - Result.append(SymName.begin(), SymName.end()); - return object_error::success; -} - bool COFFObjectFile::isRelocatableObject() const { return !DataDirectory; } @@ -1150,7 +1130,7 @@ void ImportDirectoryEntryRef::moveNext() { std::error_code ImportDirectoryEntryRef::getImportTableEntry( const import_directory_table_entry *&Result) const { Result = ImportTable + Index; - return object_error::success; + return std::error_code(); } static imported_symbol_iterator @@ -1212,19 +1192,19 @@ std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const { OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr)) return EC; Result = StringRef(reinterpret_cast<const char *>(IntPtr)); - return object_error::success; + return std::error_code(); } std::error_code ImportDirectoryEntryRef::getImportLookupTableRVA(uint32_t &Result) const { Result = ImportTable[Index].ImportLookupTableRVA; - return object_error::success; + return std::error_code(); } std::error_code ImportDirectoryEntryRef::getImportAddressTableRVA(uint32_t &Result) const { Result = ImportTable[Index].ImportAddressTableRVA; - return object_error::success; + return std::error_code(); } std::error_code ImportDirectoryEntryRef::getImportLookupEntry( @@ -1234,7 +1214,7 @@ std::error_code ImportDirectoryEntryRef::getImportLookupEntry( if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr)) return EC; Result = reinterpret_cast<const import_lookup_table_entry32 *>(IntPtr); - return object_error::success; + return std::error_code(); } bool DelayImportDirectoryEntryRef:: @@ -1268,13 +1248,13 @@ std::error_code DelayImportDirectoryEntryRef::getName(StringRef &Result) const { if (std::error_code EC = OwningObject->getRvaPtr(Table[Index].Name, IntPtr)) return EC; Result = StringRef(reinterpret_cast<const char *>(IntPtr)); - return object_error::success; + return std::error_code(); } std::error_code DelayImportDirectoryEntryRef:: getDelayImportTable(const delay_import_directory_table_entry *&Result) const { Result = Table; - return object_error::success; + return std::error_code(); } std::error_code DelayImportDirectoryEntryRef:: @@ -1288,7 +1268,7 @@ getImportAddress(int AddrIndex, uint64_t &Result) const { Result = *reinterpret_cast<const ulittle64_t *>(IntPtr); else Result = *reinterpret_cast<const ulittle32_t *>(IntPtr); - return object_error::success; + return std::error_code(); } bool ExportDirectoryEntryRef:: @@ -1308,20 +1288,20 @@ std::error_code ExportDirectoryEntryRef::getDllName(StringRef &Result) const { OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr)) return EC; Result = StringRef(reinterpret_cast<const char *>(IntPtr)); - return object_error::success; + return std::error_code(); } // Returns the starting ordinal number. std::error_code ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const { Result = ExportTable->OrdinalBase; - return object_error::success; + return std::error_code(); } // Returns the export ordinal of the current export symbol. std::error_code ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const { Result = ExportTable->OrdinalBase + Index; - return object_error::success; + return std::error_code(); } // Returns the address of the current export symbol. @@ -1333,7 +1313,7 @@ std::error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { const export_address_table_entry *entry = reinterpret_cast<const export_address_table_entry *>(IntPtr); Result = entry[Index].ExportRVA; - return object_error::success; + return std::error_code(); } // Returns the name of the current export symbol. If the symbol is exported only @@ -1359,10 +1339,10 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { if (std::error_code EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr)) return EC; Result = StringRef(reinterpret_cast<const char *>(IntPtr)); - return object_error::success; + return std::error_code(); } Result = ""; - return object_error::success; + return std::error_code(); } bool ImportedSymbolRef:: @@ -1381,11 +1361,11 @@ ImportedSymbolRef::getSymbolName(StringRef &Result) const { if (Entry32) { // If a symbol is imported only by ordinal, it has no name. if (Entry32[Index].isOrdinal()) - return object_error::success; + return std::error_code(); RVA = Entry32[Index].getHintNameRVA(); } else { if (Entry64[Index].isOrdinal()) - return object_error::success; + return std::error_code(); RVA = Entry64[Index].getHintNameRVA(); } uintptr_t IntPtr = 0; @@ -1393,7 +1373,7 @@ ImportedSymbolRef::getSymbolName(StringRef &Result) const { return EC; // +2 because the first two bytes is hint. Result = StringRef(reinterpret_cast<const char *>(IntPtr + 2)); - return object_error::success; + return std::error_code(); } std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const { @@ -1401,13 +1381,13 @@ std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const { if (Entry32) { if (Entry32[Index].isOrdinal()) { Result = Entry32[Index].getOrdinal(); - return object_error::success; + return std::error_code(); } RVA = Entry32[Index].getHintNameRVA(); } else { if (Entry64[Index].isOrdinal()) { Result = Entry64[Index].getOrdinal(); - return object_error::success; + return std::error_code(); } RVA = Entry64[Index].getHintNameRVA(); } @@ -1415,7 +1395,7 @@ std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const { if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr)) return EC; Result = *reinterpret_cast<const ulittle16_t *>(IntPtr); - return object_error::success; + return std::error_code(); } ErrorOr<std::unique_ptr<COFFObjectFile>> @@ -1452,11 +1432,11 @@ void BaseRelocRef::moveNext() { std::error_code BaseRelocRef::getType(uint8_t &Type) const { auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1); Type = Entry[Index].getType(); - return object_error::success; + return std::error_code(); } std::error_code BaseRelocRef::getRVA(uint32_t &Result) const { auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1); Result = Header->PageRVA + Entry[Index].getOffset(); - return object_error::success; + return std::error_code(); } diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 8ccb2538ac78..c7df30a59035 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -27,51 +27,28 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj) { std::size_t MaxAlignment = 1ULL << countTrailingZeros(uintptr_t(Obj.getBufferStart())); + if (MaxAlignment < 2) + return object_error::parse_failed; + std::error_code EC; std::unique_ptr<ObjectFile> R; - if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) -#if !LLVM_IS_UNALIGNED_ACCESS_FAST - if (MaxAlignment >= 4) - R.reset(new ELFObjectFile<ELFType<support::little, 4, false>>(Obj, EC)); - else -#endif - if (MaxAlignment >= 2) - R.reset(new ELFObjectFile<ELFType<support::little, 2, false>>(Obj, EC)); - else - return object_error::parse_failed; - else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) -#if !LLVM_IS_UNALIGNED_ACCESS_FAST - if (MaxAlignment >= 4) - R.reset(new ELFObjectFile<ELFType<support::big, 4, false>>(Obj, EC)); - else -#endif - if (MaxAlignment >= 2) - R.reset(new ELFObjectFile<ELFType<support::big, 2, false>>(Obj, EC)); + if (Ident.first == ELF::ELFCLASS32) { + if (Ident.second == ELF::ELFDATA2LSB) + R.reset(new ELFObjectFile<ELFType<support::little, false>>(Obj, EC)); + else if (Ident.second == ELF::ELFDATA2MSB) + R.reset(new ELFObjectFile<ELFType<support::big, false>>(Obj, EC)); else return object_error::parse_failed; - else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) -#if !LLVM_IS_UNALIGNED_ACCESS_FAST - if (MaxAlignment >= 8) - R.reset(new ELFObjectFile<ELFType<support::big, 8, true>>(Obj, EC)); - else -#endif - if (MaxAlignment >= 2) - R.reset(new ELFObjectFile<ELFType<support::big, 2, true>>(Obj, EC)); - else - return object_error::parse_failed; - else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { -#if !LLVM_IS_UNALIGNED_ACCESS_FAST - if (MaxAlignment >= 8) - R.reset(new ELFObjectFile<ELFType<support::little, 8, true>>(Obj, EC)); - else -#endif - if (MaxAlignment >= 2) - R.reset(new ELFObjectFile<ELFType<support::little, 2, true>>(Obj, EC)); + } else if (Ident.first == ELF::ELFCLASS64) { + if (Ident.second == ELF::ELFDATA2LSB) + R.reset(new ELFObjectFile<ELFType<support::little, true>>(Obj, EC)); + else if (Ident.second == ELF::ELFDATA2MSB) + R.reset(new ELFObjectFile<ELFType<support::big, true>>(Obj, EC)); else return object_error::parse_failed; + } else { + return object_error::parse_failed; } - else - llvm_unreachable("Buffer is not an ELF object file!"); if (EC) return EC; diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp index d2daab72d589..644a178c1623 100644 --- a/lib/Object/Error.cpp +++ b/lib/Object/Error.cpp @@ -33,7 +33,6 @@ const char *_object_error_category::name() const LLVM_NOEXCEPT { std::string _object_error_category::message(int EV) const { object_error E = static_cast<object_error>(EV); switch (E) { - case object_error::success: return "Success"; case object_error::arch_not_found: return "No object file for requested architecture"; case object_error::invalid_file_type: @@ -44,6 +43,12 @@ std::string _object_error_category::message(int EV) const { return "The end of the file was unexpectedly encountered"; case object_error::bitcode_section_not_found: return "Bitcode section not found in object file"; + case object_error::macho_small_load_command: + return "Mach-O load command with size < 8 bytes"; + case object_error::macho_load_segment_too_many_sections: + return "Mach-O segment load command contains too many sections"; + case object_error::macho_load_segment_too_small: + return "Mach-O segment load command size is too small"; } llvm_unreachable("An enumerator of object_error does not have a message " "defined."); diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index c12c5d4392d5..e89cb8ead36d 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -195,7 +195,7 @@ std::error_code IRObjectFile::printSymbolName(raw_ostream &OS, unsigned Index = getAsmSymIndex(Symb); assert(Index <= AsmSymbols.size()); OS << AsmSymbols[Index].first; - return object_error::success;; + return std::error_code(); } if (Mang) @@ -203,7 +203,7 @@ std::error_code IRObjectFile::printSymbolName(raw_ostream &OS, else OS << GV->getName(); - return object_error::success; + return std::error_code(); } uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const { diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 79f810064988..d02ca48a7d19 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -38,6 +38,7 @@ namespace { }; } +// FIXME: Replace all uses of this function with getStructOrErr. template <typename T> static T getStruct(const MachOObjectFile *O, const char *P) { // Don't read before the beginning or past the end of the file @@ -51,39 +52,19 @@ static T getStruct(const MachOObjectFile *O, const char *P) { return Cmd; } -template <typename SegmentCmd> -static uint32_t getSegmentLoadCommandNumSections(const SegmentCmd &S, - uint32_t Cmdsize) { - const unsigned SectionSize = sizeof(SegmentCmd); - if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize || - S.nsects * SectionSize > Cmdsize - sizeof(S)) - report_fatal_error( - "Number of sections too large for size of load command."); - return S.nsects; -} - -static uint32_t -getSegmentLoadCommandNumSections(const MachOObjectFile *O, - const MachOObjectFile::LoadCommandInfo &L) { - if (O->is64Bit()) - return getSegmentLoadCommandNumSections(O->getSegment64LoadCommand(L), - L.C.cmdsize); - - return getSegmentLoadCommandNumSections(O->getSegmentLoadCommand(L), - L.C.cmdsize); -} +template <typename T> +static ErrorOr<T> getStructOrErr(const MachOObjectFile *O, const char *P) { + // Don't read before the beginning or past the end of the file + if (P < O->getData().begin() || P + sizeof(T) > O->getData().end()) + return object_error::parse_failed; -static bool isPageZeroSegment(const MachOObjectFile *O, - const MachOObjectFile::LoadCommandInfo &L) { - if (O->is64Bit()) { - MachO::segment_command_64 S = O->getSegment64LoadCommand(L); - return StringRef("__PAGEZERO").equals(S.segname); - } - MachO::segment_command S = O->getSegmentLoadCommand(L); - return StringRef("__PAGEZERO").equals(S.segname); + T Cmd; + memcpy(&Cmd, P, sizeof(T)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Cmd); + return Cmd; } - static const char * getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L, unsigned Sec) { @@ -128,70 +109,6 @@ static unsigned getCPUType(const MachOObjectFile *O) { return O->getHeader().cputype; } -static void printRelocationTargetName(const MachOObjectFile *O, - const MachO::any_relocation_info &RE, - raw_string_ostream &fmt) { - bool IsScattered = O->isRelocationScattered(RE); - - // Target of a scattered relocation is an address. In the interest of - // generating pretty output, scan through the symbol table looking for a - // symbol that aligns with that address. If we find one, print it. - // Otherwise, we just print the hex address of the target. - if (IsScattered) { - uint32_t Val = O->getPlainRelocationSymbolNum(RE); - - for (const SymbolRef &Symbol : O->symbols()) { - std::error_code ec; - uint64_t Addr; - StringRef Name; - - if ((ec = Symbol.getAddress(Addr))) - report_fatal_error(ec.message()); - if (Addr != Val) - continue; - if ((ec = Symbol.getName(Name))) - report_fatal_error(ec.message()); - fmt << Name; - return; - } - - // If we couldn't find a symbol that this relocation refers to, try - // to find a section beginning instead. - for (const SectionRef &Section : O->sections()) { - std::error_code ec; - - StringRef Name; - uint64_t Addr = Section.getAddress(); - if (Addr != Val) - continue; - if ((ec = Section.getName(Name))) - report_fatal_error(ec.message()); - fmt << Name; - return; - } - - fmt << format("0x%x", Val); - return; - } - - StringRef S; - bool isExtern = O->getPlainRelocationExternal(RE); - uint64_t Val = O->getPlainRelocationSymbolNum(RE); - - if (isExtern) { - symbol_iterator SI = O->symbol_begin(); - advance(SI, Val); - SI->getName(S); - } else { - section_iterator SI = O->section_begin(); - // Adjust for the fact that sections are 1-indexed. - advance(SI, Val - 1); - SI->getName(S); - } - - fmt << S; -} - static uint32_t getPlainRelocationAddress(const MachO::any_relocation_info &RE) { return RE.r_word0; @@ -244,6 +161,69 @@ static uint32_t getSectionFlags(const MachOObjectFile *O, return Sect.flags; } +static ErrorOr<MachOObjectFile::LoadCommandInfo> +getLoadCommandInfo(const MachOObjectFile *Obj, const char *Ptr) { + auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr); + if (!CmdOrErr) + return CmdOrErr.getError(); + if (CmdOrErr->cmdsize < 8) + return object_error::macho_small_load_command; + MachOObjectFile::LoadCommandInfo Load; + Load.Ptr = Ptr; + Load.C = CmdOrErr.get(); + return Load; +} + +static ErrorOr<MachOObjectFile::LoadCommandInfo> +getFirstLoadCommandInfo(const MachOObjectFile *Obj) { + unsigned HeaderSize = Obj->is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize)); +} + +static ErrorOr<MachOObjectFile::LoadCommandInfo> +getNextLoadCommandInfo(const MachOObjectFile *Obj, + const MachOObjectFile::LoadCommandInfo &L) { + return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize); +} + +template <typename T> +static void parseHeader(const MachOObjectFile *Obj, T &Header, + std::error_code &EC) { + auto HeaderOrErr = getStructOrErr<T>(Obj, getPtr(Obj, 0)); + if (HeaderOrErr) + Header = HeaderOrErr.get(); + else + EC = HeaderOrErr.getError(); +} + +// Parses LC_SEGMENT or LC_SEGMENT_64 load command, adds addresses of all +// sections to \param Sections, and optionally sets +// \param IsPageZeroSegment to true. +template <typename SegmentCmd> +static std::error_code parseSegmentLoadCommand( + const MachOObjectFile *Obj, const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char *> &Sections, bool &IsPageZeroSegment) { + const unsigned SegmentLoadSize = sizeof(SegmentCmd); + if (Load.C.cmdsize < SegmentLoadSize) + return object_error::macho_load_segment_too_small; + auto SegOrErr = getStructOrErr<SegmentCmd>(Obj, Load.Ptr); + if (!SegOrErr) + return SegOrErr.getError(); + SegmentCmd S = SegOrErr.get(); + const unsigned SectionSize = + Obj->is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section); + if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize || + S.nsects * SectionSize > Load.C.cmdsize - SegmentLoadSize) + return object_error::macho_load_segment_too_many_sections; + for (unsigned J = 0; J < S.nsects; ++J) { + const char *Sec = getSectionPtr(Obj, Load, J); + Sections.push_back(Sec); + } + IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname); + return std::error_code(); +} + MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64bits, std::error_code &EC) : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), @@ -251,15 +231,25 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr), DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr), HasPageZeroSegment(false) { - uint32_t LoadCommandCount = this->getHeader().ncmds; - if (LoadCommandCount == 0) + if (is64Bit()) + parseHeader(this, Header64, EC); + else + parseHeader(this, Header, EC); + if (EC) return; - MachO::LoadCommandType SegmentLoadType = is64Bit() ? - MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT; + uint32_t LoadCommandCount = getHeader().ncmds; + if (LoadCommandCount == 0) + return; - MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo(); - for (unsigned I = 0; ; ++I) { + auto LoadOrErr = getFirstLoadCommandInfo(this); + if (!LoadOrErr) { + EC = LoadOrErr.getError(); + return; + } + LoadCommandInfo Load = LoadOrErr.get(); + for (unsigned I = 0; I < LoadCommandCount; ++I) { + LoadCommands.push_back(Load); if (Load.C.cmd == MachO::LC_SYMTAB) { // Multiple symbol tables if (SymtabLoadCmd) { @@ -303,20 +293,14 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, return; } UuidLoadCmd = Load.Ptr; - } else if (Load.C.cmd == SegmentLoadType) { - const unsigned SegmentLoadSize = this->is64Bit() - ? sizeof(MachO::segment_command_64) - : sizeof(MachO::segment_command); - if (Load.C.cmdsize < SegmentLoadSize) - report_fatal_error("Segment load command size is too small."); - - uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load); - for (unsigned J = 0; J < NumSections; ++J) { - const char *Sec = getSectionPtr(this, Load, J); - Sections.push_back(Sec); - } - if (isPageZeroSegment(this, Load)) - HasPageZeroSegment = true; + } else if (Load.C.cmd == MachO::LC_SEGMENT_64) { + if ((EC = parseSegmentLoadCommand<MachO::segment_command_64>( + this, Load, Sections, HasPageZeroSegment))) + return; + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + if ((EC = parseSegmentLoadCommand<MachO::segment_command>( + this, Load, Sections, HasPageZeroSegment))) + return; } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB || Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB || Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB || @@ -324,12 +308,16 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) { Libraries.push_back(Load.Ptr); } - - if (I == LoadCommandCount - 1) - break; - else - Load = getNextLoadCommandInfo(Load); + if (I < LoadCommandCount - 1) { + auto LoadOrErr = getNextLoadCommandInfo(this, Load); + if (!LoadOrErr) { + EC = LoadOrErr.getError(); + return; + } + Load = LoadOrErr.get(); + } } + assert(LoadCommands.size() == LoadCommandCount); } void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { @@ -348,7 +336,7 @@ std::error_code MachOObjectFile::getSymbolName(DataRefImpl Symb, report_fatal_error( "Symbol name entry points before beginning or past end of file."); Res = StringRef(Start); - return object_error::success; + return std::error_code(); } unsigned MachOObjectFile::getSectionType(SectionRef Sec) const { @@ -378,7 +366,7 @@ std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb, return object_error::parse_failed; const char *Start = &StringTable.data()[NValue]; Res = StringRef(Start); - return object_error::success; + return std::error_code(); } std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, @@ -398,31 +386,25 @@ std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, else Res = Entry.n_value; } - return object_error::success; + return std::error_code(); } -std::error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI, - uint32_t &Result) const { +uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const { uint32_t flags = getSymbolFlags(DRI); if (flags & SymbolRef::SF_Common) { MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI); - Result = 1 << MachO::GET_COMM_ALIGN(Entry.n_desc); - } else { - Result = 0; + return 1 << MachO::GET_COMM_ALIGN(Entry.n_desc); } - return object_error::success; + return 0; } -std::error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, - uint64_t &Result) const { +uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const { uint64_t Value; getSymbolAddress(DRI, Value); uint32_t flags = getSymbolFlags(DRI); if (flags & SymbolRef::SF_Common) - Result = Value; - else - Result = UnknownAddressOrSize; - return object_error::success; + return Value; + return UnknownAddressOrSize; } std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, @@ -435,7 +417,7 @@ std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, // If this is a STAB debugging symbol, we can do nothing more. if (n_type & MachO::N_STAB) { Res = SymbolRef::ST_Debug; - return object_error::success; + return std::error_code(); } switch (n_type & MachO::N_TYPE) { @@ -446,7 +428,7 @@ std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, Res = SymbolRef::ST_Function; break; } - return object_error::success; + return std::error_code(); } uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { @@ -506,7 +488,7 @@ std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb, Res = section_iterator(SectionRef(DRI, this)); } - return object_error::success; + return std::error_code(); } void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const { @@ -517,7 +499,7 @@ std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const { ArrayRef<char> Raw = getSectionRawName(Sec); Result = parseSegmentOrSectionName(Raw.data()); - return object_error::success; + return std::error_code(); } uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { @@ -548,7 +530,7 @@ std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec, } Res = this->getData().substr(Offset, Size); - return object_error::success; + return std::error_code(); } uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const { @@ -643,7 +625,7 @@ std::error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, Sec.d.a = Rel.d.a; uint64_t SecAddress = getSectionAddress(Sec); Res = SecAddress + Offset; - return object_error::success; + return std::error_code(); } std::error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, @@ -652,7 +634,7 @@ std::error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, "Only implemented for MH_OBJECT"); MachO::any_relocation_info RE = getRelocation(Rel); Res = getAnyRelocationAddress(RE); - return object_error::success; + return std::error_code(); } symbol_iterator @@ -685,7 +667,7 @@ std::error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, uint64_t &Res) const { MachO::any_relocation_info RE = getRelocation(Rel); Res = getAnyRelocationType(RE); - return object_error::success; + return std::error_code(); } std::error_code @@ -797,183 +779,7 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, break; } Result.append(res.begin(), res.end()); - return object_error::success; -} - -std::error_code -MachOObjectFile::getRelocationValueString(DataRefImpl Rel, - SmallVectorImpl<char> &Result) const { - MachO::any_relocation_info RE = getRelocation(Rel); - - unsigned Arch = this->getArch(); - - std::string fmtbuf; - raw_string_ostream fmt(fmtbuf); - unsigned Type = this->getAnyRelocationType(RE); - bool IsPCRel = this->getAnyRelocationPCRel(RE); - - // Determine any addends that should be displayed with the relocation. - // These require decoding the relocation type, which is triple-specific. - - // X86_64 has entirely custom relocation types. - if (Arch == Triple::x86_64) { - bool isPCRel = getAnyRelocationPCRel(RE); - - switch (Type) { - case MachO::X86_64_RELOC_GOT_LOAD: - case MachO::X86_64_RELOC_GOT: { - printRelocationTargetName(this, RE, fmt); - fmt << "@GOT"; - if (isPCRel) fmt << "PCREL"; - break; - } - case MachO::X86_64_RELOC_SUBTRACTOR: { - DataRefImpl RelNext = Rel; - moveRelocationNext(RelNext); - MachO::any_relocation_info RENext = getRelocation(RelNext); - - // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type - // X86_64_RELOC_UNSIGNED. - // NOTE: Scattered relocations don't exist on x86_64. - unsigned RType = getAnyRelocationType(RENext); - if (RType != MachO::X86_64_RELOC_UNSIGNED) - report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " - "X86_64_RELOC_SUBTRACTOR."); - - // The X86_64_RELOC_UNSIGNED contains the minuend symbol; - // X86_64_RELOC_SUBTRACTOR contains the subtrahend. - printRelocationTargetName(this, RENext, fmt); - fmt << "-"; - printRelocationTargetName(this, RE, fmt); - break; - } - case MachO::X86_64_RELOC_TLV: - printRelocationTargetName(this, RE, fmt); - fmt << "@TLV"; - if (isPCRel) fmt << "P"; - break; - case MachO::X86_64_RELOC_SIGNED_1: - printRelocationTargetName(this, RE, fmt); - fmt << "-1"; - break; - case MachO::X86_64_RELOC_SIGNED_2: - printRelocationTargetName(this, RE, fmt); - fmt << "-2"; - break; - case MachO::X86_64_RELOC_SIGNED_4: - printRelocationTargetName(this, RE, fmt); - fmt << "-4"; - break; - default: - printRelocationTargetName(this, RE, fmt); - break; - } - // X86 and ARM share some relocation types in common. - } else if (Arch == Triple::x86 || Arch == Triple::arm || - Arch == Triple::ppc) { - // Generic relocation types... - switch (Type) { - case MachO::GENERIC_RELOC_PAIR: // prints no info - return object_error::success; - case MachO::GENERIC_RELOC_SECTDIFF: { - DataRefImpl RelNext = Rel; - moveRelocationNext(RelNext); - MachO::any_relocation_info RENext = getRelocation(RelNext); - - // X86 sect diff's must be followed by a relocation of type - // GENERIC_RELOC_PAIR. - unsigned RType = getAnyRelocationType(RENext); - - if (RType != MachO::GENERIC_RELOC_PAIR) - report_fatal_error("Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_SECTDIFF."); - - printRelocationTargetName(this, RE, fmt); - fmt << "-"; - printRelocationTargetName(this, RENext, fmt); - break; - } - } - - if (Arch == Triple::x86 || Arch == Triple::ppc) { - switch (Type) { - case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: { - DataRefImpl RelNext = Rel; - moveRelocationNext(RelNext); - MachO::any_relocation_info RENext = getRelocation(RelNext); - - // X86 sect diff's must be followed by a relocation of type - // GENERIC_RELOC_PAIR. - unsigned RType = getAnyRelocationType(RENext); - if (RType != MachO::GENERIC_RELOC_PAIR) - report_fatal_error("Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_LOCAL_SECTDIFF."); - - printRelocationTargetName(this, RE, fmt); - fmt << "-"; - printRelocationTargetName(this, RENext, fmt); - break; - } - case MachO::GENERIC_RELOC_TLV: { - printRelocationTargetName(this, RE, fmt); - fmt << "@TLV"; - if (IsPCRel) fmt << "P"; - break; - } - default: - printRelocationTargetName(this, RE, fmt); - } - } else { // ARM-specific relocations - switch (Type) { - case MachO::ARM_RELOC_HALF: - case MachO::ARM_RELOC_HALF_SECTDIFF: { - // Half relocations steal a bit from the length field to encode - // whether this is an upper16 or a lower16 relocation. - bool isUpper = getAnyRelocationLength(RE) >> 1; - - if (isUpper) - fmt << ":upper16:("; - else - fmt << ":lower16:("; - printRelocationTargetName(this, RE, fmt); - - DataRefImpl RelNext = Rel; - moveRelocationNext(RelNext); - MachO::any_relocation_info RENext = getRelocation(RelNext); - - // ARM half relocs must be followed by a relocation of type - // ARM_RELOC_PAIR. - unsigned RType = getAnyRelocationType(RENext); - if (RType != MachO::ARM_RELOC_PAIR) - report_fatal_error("Expected ARM_RELOC_PAIR after " - "ARM_RELOC_HALF"); - - // NOTE: The half of the target virtual address is stashed in the - // address field of the secondary relocation, but we can't reverse - // engineer the constant offset from it without decoding the movw/movt - // instruction to find the other half in its immediate field. - - // ARM_RELOC_HALF_SECTDIFF encodes the second section in the - // symbol/section pointer of the follow-on relocation. - if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) { - fmt << "-"; - printRelocationTargetName(this, RENext, fmt); - } - - fmt << ")"; - break; - } - default: { - printRelocationTargetName(this, RE, fmt); - } - } - } - } else - printRelocationTargetName(this, RE, fmt); - - fmt.flush(); - Result.append(fmtbuf.begin(), fmtbuf.end()); - return object_error::success; + return std::error_code(); } std::error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel, @@ -1001,7 +807,12 @@ std::error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel, } } - return object_error::success; + return std::error_code(); +} + +uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); + return getAnyRelocationLength(RE); } // @@ -1179,7 +990,7 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, } Res = LibrariesShortNames[Index]; - return object_error::success; + return std::error_code(); } basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const { @@ -1437,21 +1248,8 @@ unsigned MachOObjectFile::getArch() const { Triple MachOObjectFile::getArch(const char **McpuDefault, Triple *ThumbTriple) const { - Triple T; - if (is64Bit()) { - MachO::mach_header_64 H_64; - H_64 = getHeader64(); - T = MachOObjectFile::getArch(H_64.cputype, H_64.cpusubtype, McpuDefault); - *ThumbTriple = MachOObjectFile::getThumbArch(H_64.cputype, H_64.cpusubtype, - McpuDefault); - } else { - MachO::mach_header H; - H = getHeader(); - T = MachOObjectFile::getArch(H.cputype, H.cpusubtype, McpuDefault); - *ThumbTriple = MachOObjectFile::getThumbArch(H.cputype, H.cpusubtype, - McpuDefault); - } - return T; + *ThumbTriple = getThumbArch(Header.cputype, Header.cpusubtype, McpuDefault); + return getArch(Header.cputype, Header.cpusubtype, McpuDefault); } relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const { @@ -2104,6 +1902,22 @@ iterator_range<bind_iterator> MachOObjectFile::weakBindTable() const { MachOBindEntry::Kind::Weak); } +MachOObjectFile::load_command_iterator +MachOObjectFile::begin_load_commands() const { + return LoadCommands.begin(); +} + +MachOObjectFile::load_command_iterator +MachOObjectFile::end_load_commands() const { + return LoadCommands.end(); +} + +iterator_range<MachOObjectFile::load_command_iterator> +MachOObjectFile::load_commands() const { + return iterator_range<load_command_iterator>(begin_load_commands(), + end_load_commands()); +} + StringRef MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec); @@ -2203,29 +2017,6 @@ MachOObjectFile::getAnyRelocationSection( return SectionRef(DRI, this); } -MachOObjectFile::LoadCommandInfo -MachOObjectFile::getFirstLoadCommandInfo() const { - MachOObjectFile::LoadCommandInfo Load; - - unsigned HeaderSize = is64Bit() ? sizeof(MachO::mach_header_64) : - sizeof(MachO::mach_header); - Load.Ptr = getPtr(this, HeaderSize); - Load.C = getStruct<MachO::load_command>(this, Load.Ptr); - if (Load.C.cmdsize < 8) - report_fatal_error("Load command with size < 8 bytes."); - return Load; -} - -MachOObjectFile::LoadCommandInfo -MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const { - MachOObjectFile::LoadCommandInfo Next; - Next.Ptr = L.Ptr + L.C.cmdsize; - Next.C = getStruct<MachO::load_command>(this, Next.Ptr); - if (Next.C.cmdsize < 8) - report_fatal_error("Load command with size < 8 bytes."); - return Next; -} - MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const { assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); return getStruct<MachO::section>(this, Sections[DRI.d.a]); @@ -2390,12 +2181,13 @@ MachOObjectFile::getDice(DataRefImpl Rel) const { return getStruct<MachO::data_in_code_entry>(this, P); } -MachO::mach_header MachOObjectFile::getHeader() const { - return getStruct<MachO::mach_header>(this, getPtr(this, 0)); +const MachO::mach_header &MachOObjectFile::getHeader() const { + return Header; } -MachO::mach_header_64 MachOObjectFile::getHeader64() const { - return getStruct<MachO::mach_header_64>(this, getPtr(this, 0)); +const MachO::mach_header_64 &MachOObjectFile::getHeader64() const { + assert(is64Bit()); + return Header64; } uint32_t MachOObjectFile::getIndirectSymbolTableEntry( diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp index a01c83873e09..2705e7dc40e5 100644 --- a/lib/Object/MachOUniversal.cpp +++ b/lib/Object/MachOUniversal.cpp @@ -120,7 +120,7 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, ec = object_error::parse_failed; return; } - ec = object_error::success; + ec = std::error_code(); } static bool getCTMForArch(Triple::ArchType Arch, MachO::CPUType &CTM) { diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 84a5df089cb4..85f243675efc 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -187,10 +187,7 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { } uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { - uint64_t ret; - if (std::error_code ec = (*unwrap(SI))->getSize(ret)) - report_fatal_error(ec.message()); - return ret; + return (*unwrap(SI))->getSize(); } // RelocationRef accessors @@ -233,12 +230,6 @@ const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) { // NOTE: Caller takes ownership of returned string. const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) { - SmallVector<char, 0> ret; - if (std::error_code ec = (*unwrap(RI))->getValueString(ret)) - report_fatal_error(ec.message()); - - char *str = static_cast<char*>(malloc(ret.size())); - std::copy(ret.begin(), ret.end(), str); - return str; + return strdup(""); } diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 01b76543fa2e..f6667d9ea6aa 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -34,14 +34,10 @@ std::error_code ObjectFile::printSymbolName(raw_ostream &OS, if (std::error_code EC = getSymbolName(Symb, Name)) return EC; OS << Name; - return object_error::success; + return std::error_code(); } -std::error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI, - uint32_t &Result) const { - Result = 0; - return object_error::success; -} +uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; } section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { return section_iterator(SectionRef(Sec, this)); diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp index cf6cd58f9533..ec531c3753ec 100644 --- a/lib/ProfileData/CoverageMappingReader.cpp +++ b/lib/ProfileData/CoverageMappingReader.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -358,8 +359,12 @@ std::error_code readCoverageMappingData( const char *CovBuf = Buf; Buf += CoverageSize; const char *CovEnd = Buf; + if (Buf > End) return coveragemap_error::malformed; + // Each coverage map has an alignment of 8, so we need to adjust alignment + // before reading the next map. + Buf += alignmentAdjustment(Buf, 8); while (FunBuf < FunEnd) { // Read the function information diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index eb99242914c2..47751fce3fcd 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -49,9 +49,9 @@ static ManagedStatic<std::vector<std::string>> CurrentDebugType; bool isCurrentDebugType(const char *DebugType) { if (CurrentDebugType->empty()) return true; - // see if DebugType is in list. Note: do not use find() as that forces us to + // See if DebugType is in list. Note: do not use find() as that forces us to // unnecessarily create an std::string instance. - for (auto d : *CurrentDebugType) { + for (auto &d : *CurrentDebugType) { if (d == DebugType) return true; } diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index d2b551e8a0a6..9a7aeb50a216 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -178,3 +178,12 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { LLVMBool LLVMLoadLibraryPermanently(const char* Filename) { return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename); } + +void *LLVMSearchForAddressOfSymbol(const char *symbolName) { + return llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(symbolName); +} + +void LLVMAddSymbol(const char *symbolName, void *symbolValue) { + return llvm::sys::DynamicLibrary::AddSymbol(symbolName, symbolValue); +} + diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp index f9c0e78270c9..b931505bd6a1 100644 --- a/lib/Support/SmallVector.cpp +++ b/lib/Support/SmallVector.cpp @@ -33,6 +33,7 @@ void SmallVectorBase::grow_pod(void *FirstEl, size_t MinSizeInBytes, // If this wasn't grown from the inline copy, grow the allocated space. NewElts = realloc(this->BeginX, NewCapacityInBytes); } + assert(NewElts && "Out of memory"); this->EndX = (char*)NewElts+CurSizeBytes; this->BeginX = NewElts; diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp index a3998d2d13ea..757483b95864 100644 --- a/lib/Support/TargetParser.cpp +++ b/lib/Support/TargetParser.cpp @@ -22,72 +22,88 @@ using namespace llvm; namespace { -// List of canonical FPU names (use getFPUSynonym) +// List of canonical FPU names (use getFPUSynonym) and which architectural +// features they correspond to (use getFPUFeatures). // FIXME: TableGen this. struct { const char * Name; ARM::FPUKind ID; + unsigned FPUVersion; ///< Corresponds directly to the FP arch version number. + ARM::NeonSupportLevel NeonSupport; + ARM::FPURestriction Restriction; } FPUNames[] = { - { "invalid", ARM::FK_INVALID }, - { "vfp", ARM::FK_VFP }, - { "vfpv2", ARM::FK_VFPV2 }, - { "vfpv3", ARM::FK_VFPV3 }, - { "vfpv3-d16", ARM::FK_VFPV3_D16 }, - { "vfpv4", ARM::FK_VFPV4 }, - { "vfpv4-d16", ARM::FK_VFPV4_D16 }, - { "fpv5-d16", ARM::FK_FPV5_D16 }, - { "fp-armv8", ARM::FK_FP_ARMV8 }, - { "neon", ARM::FK_NEON }, - { "neon-vfpv4", ARM::FK_NEON_VFPV4 }, - { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8 }, - { "crypto-neon-fp-armv8", ARM::FK_CRYPTO_NEON_FP_ARMV8 }, - { "softvfp", ARM::FK_SOFTVFP } + { "invalid", ARM::FK_INVALID, 0, ARM::NS_None, ARM::FR_None}, + { "none", ARM::FK_NONE, 0, ARM::NS_None, ARM::FR_None}, + { "vfp", ARM::FK_VFP, 2, ARM::NS_None, ARM::FR_None}, + { "vfpv2", ARM::FK_VFPV2, 2, ARM::NS_None, ARM::FR_None}, + { "vfpv3", ARM::FK_VFPV3, 3, ARM::NS_None, ARM::FR_None}, + { "vfpv3-d16", ARM::FK_VFPV3_D16, 3, ARM::NS_None, ARM::FR_D16}, + { "vfpv4", ARM::FK_VFPV4, 4, ARM::NS_None, ARM::FR_None}, + { "vfpv4-d16", ARM::FK_VFPV4_D16, 4, ARM::NS_None, ARM::FR_D16}, + { "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, 4, ARM::NS_None, ARM::FR_SP_D16}, + { "fpv5-d16", ARM::FK_FPV5_D16, 5, ARM::NS_None, ARM::FR_D16}, + { "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, 5, ARM::NS_None, ARM::FR_SP_D16}, + { "fp-armv8", ARM::FK_FP_ARMV8, 5, ARM::NS_None, ARM::FR_None}, + { "neon", ARM::FK_NEON, 3, ARM::NS_Neon, ARM::FR_None}, + { "neon-vfpv4", ARM::FK_NEON_VFPV4, 4, ARM::NS_Neon, ARM::FR_None}, + { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, 5, ARM::NS_Neon, ARM::FR_None}, + { "crypto-neon-fp-armv8", + ARM::FK_CRYPTO_NEON_FP_ARMV8, 5, ARM::NS_Crypto, ARM::FR_None}, + { "softvfp", ARM::FK_SOFTVFP, 0, ARM::NS_None, ARM::FR_None}, }; -// List of canonical arch names (use getArchSynonym) + +// List of canonical arch names (use getArchSynonym). +// This table also provides the build attribute fields for CPU arch +// and Arch ID, according to the Addenda to the ARM ABI, chapters +// 2.4 and 2.3.5.2 respectively. +// FIXME: SubArch values were simplified to fit into the expectations +// of the triples and are not conforming with their official names. +// Check to see if the expectation should be changed. // FIXME: TableGen this. struct { const char *Name; ARM::ArchKind ID; - const char *DefaultCPU; - ARMBuildAttrs::CPUArch DefaultArch; + const char *CPUAttr; // CPU class in build attributes. + const char *SubArch; // Sub-Arch name. + ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes. } ARCHNames[] = { - { "invalid", ARM::AK_INVALID, nullptr, ARMBuildAttrs::CPUArch::Pre_v4 }, - { "armv2", ARM::AK_ARMV2, "2", ARMBuildAttrs::CPUArch::v4 }, - { "armv2a", ARM::AK_ARMV2A, "2A", ARMBuildAttrs::CPUArch::v4 }, - { "armv3", ARM::AK_ARMV3, "3", ARMBuildAttrs::CPUArch::v4 }, - { "armv3m", ARM::AK_ARMV3M, "3M", ARMBuildAttrs::CPUArch::v4 }, - { "armv4", ARM::AK_ARMV4, "4", ARMBuildAttrs::CPUArch::v4 }, - { "armv4t", ARM::AK_ARMV4T, "4T", ARMBuildAttrs::CPUArch::v4T }, - { "armv5", ARM::AK_ARMV5, "5", ARMBuildAttrs::CPUArch::v5T }, - { "armv5t", ARM::AK_ARMV5T, "5T", ARMBuildAttrs::CPUArch::v5T }, - { "armv5te", ARM::AK_ARMV5TE, "5TE", ARMBuildAttrs::CPUArch::v5TE }, - { "armv6", ARM::AK_ARMV6, "6", ARMBuildAttrs::CPUArch::v6 }, - { "armv6j", ARM::AK_ARMV6J, "6J", ARMBuildAttrs::CPUArch::v6 }, - { "armv6k", ARM::AK_ARMV6K, "6K", ARMBuildAttrs::CPUArch::v6K }, - { "armv6t2", ARM::AK_ARMV6T2, "6T2", ARMBuildAttrs::CPUArch::v6T2 }, - { "armv6z", ARM::AK_ARMV6Z, "6Z", ARMBuildAttrs::CPUArch::v6KZ }, - { "armv6zk", ARM::AK_ARMV6ZK, "6ZK", ARMBuildAttrs::CPUArch::v6KZ }, - { "armv6-m", ARM::AK_ARMV6M, "6-M", ARMBuildAttrs::CPUArch::v6_M }, - { "armv7", ARM::AK_ARMV7, "7", ARMBuildAttrs::CPUArch::v7 }, - { "armv7-a", ARM::AK_ARMV7A, "7-A", ARMBuildAttrs::CPUArch::v7 }, - { "armv7-r", ARM::AK_ARMV7R, "7-R", ARMBuildAttrs::CPUArch::v7 }, - { "armv7-m", ARM::AK_ARMV7M, "7-M", ARMBuildAttrs::CPUArch::v7 }, - { "armv8-a", ARM::AK_ARMV8A, "8-A", ARMBuildAttrs::CPUArch::v8 }, - { "armv8.1-a", ARM::AK_ARMV8_1A, "8.1-A", ARMBuildAttrs::CPUArch::v8 }, + { "invalid", ARM::AK_INVALID, nullptr, nullptr, ARMBuildAttrs::CPUArch::Pre_v4 }, + { "armv2", ARM::AK_ARMV2, "2", "v2", ARMBuildAttrs::CPUArch::Pre_v4 }, + { "armv2a", ARM::AK_ARMV2A, "2A", "v2a", ARMBuildAttrs::CPUArch::Pre_v4 }, + { "armv3", ARM::AK_ARMV3, "3", "v3", ARMBuildAttrs::CPUArch::Pre_v4 }, + { "armv3m", ARM::AK_ARMV3M, "3M", "v3m", ARMBuildAttrs::CPUArch::Pre_v4 }, + { "armv4", ARM::AK_ARMV4, "4", "v4", ARMBuildAttrs::CPUArch::v4 }, + { "armv4t", ARM::AK_ARMV4T, "4T", "v4t", ARMBuildAttrs::CPUArch::v4T }, + { "armv5t", ARM::AK_ARMV5T, "5T", "v5", ARMBuildAttrs::CPUArch::v5T }, + { "armv5te", ARM::AK_ARMV5TE, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE }, + { "armv5tej", ARM::AK_ARMV5TEJ, "5TEJ", "v5e", ARMBuildAttrs::CPUArch::v5TEJ }, + { "armv6", ARM::AK_ARMV6, "6", "v6", ARMBuildAttrs::CPUArch::v6 }, + { "armv6k", ARM::AK_ARMV6K, "6K", "v6k", ARMBuildAttrs::CPUArch::v6K }, + { "armv6t2", ARM::AK_ARMV6T2, "6T2", "v6t2", ARMBuildAttrs::CPUArch::v6T2 }, + { "armv6z", ARM::AK_ARMV6Z, "6Z", "v6z", ARMBuildAttrs::CPUArch::v6KZ }, + { "armv6zk", ARM::AK_ARMV6ZK, "6ZK", "v6zk", ARMBuildAttrs::CPUArch::v6KZ }, + { "armv6-m", ARM::AK_ARMV6M, "6-M", "v6m", ARMBuildAttrs::CPUArch::v6_M }, + { "armv6s-m", ARM::AK_ARMV6SM, "6S-M", "v6sm", ARMBuildAttrs::CPUArch::v6S_M }, + { "armv7-a", ARM::AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7 }, + { "armv7-r", ARM::AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7 }, + { "armv7-m", ARM::AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7 }, + { "armv7e-m", ARM::AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M }, + { "armv8-a", ARM::AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8 }, + { "armv8.1-a", ARM::AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8 }, // Non-standard Arch names. - { "iwmmxt", ARM::AK_IWMMXT, "iwmmxt", ARMBuildAttrs::CPUArch::v5TE }, - { "iwmmxt2", ARM::AK_IWMMXT2, "iwmmxt2", ARMBuildAttrs::CPUArch::v5TE }, - { "xscale", ARM::AK_XSCALE, "xscale", ARMBuildAttrs::CPUArch::v5TE }, - { "armv5e", ARM::AK_ARMV5E, "5E", ARMBuildAttrs::CPUArch::v5TE }, - { "armv5tej", ARM::AK_ARMV5TEJ, "5TE", ARMBuildAttrs::CPUArch::v5TE }, - { "armv6sm", ARM::AK_ARMV6SM, "6-M", ARMBuildAttrs::CPUArch::v6_M }, - { "armv6hl", ARM::AK_ARMV6HL, "6-M", ARMBuildAttrs::CPUArch::v6_M }, - { "armv7e-m", ARM::AK_ARMV7EM, "7E-M", ARMBuildAttrs::CPUArch::v7E_M }, - { "armv7l", ARM::AK_ARMV7L, "7-L", ARMBuildAttrs::CPUArch::v7 }, - { "armv7hl", ARM::AK_ARMV7HL, "7H-L", ARMBuildAttrs::CPUArch::v7 }, - { "armv7s", ARM::AK_ARMV7S, "7-S", ARMBuildAttrs::CPUArch::v7 } + { "iwmmxt", ARM::AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE }, + { "iwmmxt2", ARM::AK_IWMMXT2, "iwmmxt2", "", ARMBuildAttrs::CPUArch::v5TE }, + { "xscale", ARM::AK_XSCALE, "xscale", "", ARMBuildAttrs::CPUArch::v5TE }, + { "armv5", ARM::AK_ARMV5, "5T", "v5", ARMBuildAttrs::CPUArch::v5T }, + { "armv5e", ARM::AK_ARMV5E, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE }, + { "armv6j", ARM::AK_ARMV6J, "6J", "v6", ARMBuildAttrs::CPUArch::v6 }, + { "armv6hl", ARM::AK_ARMV6HL, "6-M", "v6hl", ARMBuildAttrs::CPUArch::v6_M }, + { "armv7", ARM::AK_ARMV7, "7", "v7", ARMBuildAttrs::CPUArch::v7 }, + { "armv7l", ARM::AK_ARMV7L, "7-L", "v7l", ARMBuildAttrs::CPUArch::v7 }, + { "armv7hl", ARM::AK_ARMV7HL, "7-L", "v7hl", ARMBuildAttrs::CPUArch::v7 }, + { "armv7s", ARM::AK_ARMV7S, "7-S", "v7s", ARMBuildAttrs::CPUArch::v7 } }; -// List of canonical ARCH names (use getARCHSynonym) +// List of Arch Extension names. // FIXME: TableGen this. struct { const char *Name; @@ -99,12 +115,19 @@ struct { { "fp", ARM::AEK_FP }, { "idiv", ARM::AEK_HWDIV }, { "mp", ARM::AEK_MP }, + { "simd", ARM::AEK_SIMD }, { "sec", ARM::AEK_SEC }, - { "virt", ARM::AEK_VIRT } + { "virt", ARM::AEK_VIRT }, + { "os", ARM::AEK_OS }, + { "iwmmxt", ARM::AEK_IWMMXT }, + { "iwmmxt2", ARM::AEK_IWMMXT2 }, + { "maverick", ARM::AEK_MAVERICK }, + { "xscale", ARM::AEK_XSCALE } }; // List of CPU names and their arches. // The same CPU can have multiple arches and can be default on multiple arches. // When finding the Arch for a CPU, first-found prevails. Sort them accordingly. +// When this becomes table-generated, we'd probably need two tables. // FIXME: TableGen this. struct { const char *Name; @@ -112,9 +135,15 @@ struct { bool Default; } CPUNames[] = { { "arm2", ARM::AK_ARMV2, true }, + { "arm3", ARM::AK_ARMV2A, true }, { "arm6", ARM::AK_ARMV3, true }, { "arm7m", ARM::AK_ARMV3M, true }, + { "arm8", ARM::AK_ARMV4, false }, + { "arm810", ARM::AK_ARMV4, false }, { "strongarm", ARM::AK_ARMV4, true }, + { "strongarm110", ARM::AK_ARMV4, false }, + { "strongarm1100", ARM::AK_ARMV4, false }, + { "strongarm1110", ARM::AK_ARMV4, false }, { "arm7tdmi", ARM::AK_ARMV4T, true }, { "arm7tdmi-s", ARM::AK_ARMV4T, false }, { "arm710t", ARM::AK_ARMV4T, false }, @@ -127,24 +156,21 @@ struct { { "arm9312", ARM::AK_ARMV4T, false }, { "arm940t", ARM::AK_ARMV4T, false }, { "ep9312", ARM::AK_ARMV4T, false }, - { "arm10tdmi", ARM::AK_ARMV5, true }, { "arm10tdmi", ARM::AK_ARMV5T, true }, { "arm1020t", ARM::AK_ARMV5T, false }, - { "xscale", ARM::AK_XSCALE, true }, - { "xscale", ARM::AK_ARMV5TE, false }, { "arm9e", ARM::AK_ARMV5TE, false }, - { "arm926ej-s", ARM::AK_ARMV5TE, false }, - { "arm946ej-s", ARM::AK_ARMV5TE, false }, + { "arm946e-s", ARM::AK_ARMV5TE, false }, { "arm966e-s", ARM::AK_ARMV5TE, false }, { "arm968e-s", ARM::AK_ARMV5TE, false }, + { "arm10e", ARM::AK_ARMV5TE, false }, { "arm1020e", ARM::AK_ARMV5TE, false }, { "arm1022e", ARM::AK_ARMV5TE, true }, { "iwmmxt", ARM::AK_ARMV5TE, false }, - { "iwmmxt", ARM::AK_IWMMXT, true }, + { "xscale", ARM::AK_ARMV5TE, false }, + { "arm926ej-s", ARM::AK_ARMV5TEJ, true }, { "arm1136jf-s", ARM::AK_ARMV6, true }, - { "arm1136j-s", ARM::AK_ARMV6J, true }, - { "arm1136jz-s", ARM::AK_ARMV6J, false }, { "arm1176j-s", ARM::AK_ARMV6K, false }, + { "arm1176jz-s", ARM::AK_ARMV6K, false }, { "mpcore", ARM::AK_ARMV6K, false }, { "mpcorenovfp", ARM::AK_ARMV6K, false }, { "arm1176jzf-s", ARM::AK_ARMV6K, true }, @@ -156,7 +182,6 @@ struct { { "cortex-m0plus", ARM::AK_ARMV6M, false }, { "cortex-m1", ARM::AK_ARMV6M, false }, { "sc000", ARM::AK_ARMV6M, false }, - { "cortex-a8", ARM::AK_ARMV7, true }, { "cortex-a5", ARM::AK_ARMV7A, false }, { "cortex-a7", ARM::AK_ARMV7A, false }, { "cortex-a8", ARM::AK_ARMV7A, true }, @@ -171,18 +196,23 @@ struct { { "cortex-r7", ARM::AK_ARMV7R, false }, { "sc300", ARM::AK_ARMV7M, false }, { "cortex-m3", ARM::AK_ARMV7M, true }, - { "cortex-m4", ARM::AK_ARMV7M, false }, - { "cortex-m7", ARM::AK_ARMV7M, false }, + { "cortex-m4", ARM::AK_ARMV7EM, true }, + { "cortex-m7", ARM::AK_ARMV7EM, false }, { "cortex-a53", ARM::AK_ARMV8A, true }, { "cortex-a57", ARM::AK_ARMV8A, false }, { "cortex-a72", ARM::AK_ARMV8A, false }, { "cyclone", ARM::AK_ARMV8A, false }, { "generic", ARM::AK_ARMV8_1A, true }, // Non-standard Arch names. + { "iwmmxt", ARM::AK_IWMMXT, true }, + { "xscale", ARM::AK_XSCALE, true }, + { "arm10tdmi", ARM::AK_ARMV5, true }, { "arm1022e", ARM::AK_ARMV5E, true }, - { "arm926ej-s", ARM::AK_ARMV5TEJ, true }, + { "arm1136j-s", ARM::AK_ARMV6J, true }, + { "arm1136jz-s", ARM::AK_ARMV6J, false }, { "cortex-m0", ARM::AK_ARMV6SM, true }, { "arm1176jzf-s", ARM::AK_ARMV6HL, true }, + { "cortex-a8", ARM::AK_ARMV7, true }, { "cortex-a8", ARM::AK_ARMV7L, true }, { "cortex-a8", ARM::AK_ARMV7HL, true }, { "cortex-m4", ARM::AK_ARMV7EM, true }, @@ -193,8 +223,6 @@ struct { } // namespace -namespace llvm { - // ======================================================= // // Information by ID // ======================================================= // @@ -205,22 +233,117 @@ const char *ARMTargetParser::getFPUName(unsigned FPUKind) { return FPUNames[FPUKind].Name; } +unsigned ARMTargetParser::getFPUVersion(unsigned FPUKind) { + if (FPUKind >= ARM::FK_LAST) + return 0; + return FPUNames[FPUKind].FPUVersion; +} + +unsigned ARMTargetParser::getFPUNeonSupportLevel(unsigned FPUKind) { + if (FPUKind >= ARM::FK_LAST) + return 0; + return FPUNames[FPUKind].NeonSupport; +} + +unsigned ARMTargetParser::getFPURestriction(unsigned FPUKind) { + if (FPUKind >= ARM::FK_LAST) + return 0; + return FPUNames[FPUKind].Restriction; +} + +bool ARMTargetParser::getFPUFeatures(unsigned FPUKind, + std::vector<const char *> &Features) { + + if (FPUKind >= ARM::FK_LAST || FPUKind == ARM::FK_INVALID) + return false; + + // fp-only-sp and d16 subtarget features are independent of each other, so we + // must enable/disable both. + switch (FPUNames[FPUKind].Restriction) { + case ARM::FR_SP_D16: + Features.push_back("+fp-only-sp"); + Features.push_back("+d16"); + break; + case ARM::FR_D16: + Features.push_back("-fp-only-sp"); + Features.push_back("+d16"); + break; + case ARM::FR_None: + Features.push_back("-fp-only-sp"); + Features.push_back("-d16"); + break; + } + + // FPU version subtarget features are inclusive of lower-numbered ones, so + // enable the one corresponding to this version and disable all that are + // higher. + switch (FPUNames[FPUKind].FPUVersion) { + case 5: + Features.push_back("+fp-armv8"); + break; + case 4: + Features.push_back("+vfp4"); + Features.push_back("-fp-armv8"); + break; + case 3: + Features.push_back("+vfp3"); + Features.push_back("-vfp4"); + Features.push_back("-fp-armv8"); + break; + case 2: + Features.push_back("+vfp2"); + Features.push_back("-vfp3"); + Features.push_back("-vfp4"); + Features.push_back("-fp-armv8"); + break; + case 0: + Features.push_back("-vfp2"); + Features.push_back("-vfp3"); + Features.push_back("-vfp4"); + Features.push_back("-fp-armv8"); + break; + } + + // crypto includes neon, so we handle this similarly to FPU version. + switch (FPUNames[FPUKind].NeonSupport) { + case ARM::NS_Crypto: + Features.push_back("+crypto"); + break; + case ARM::NS_Neon: + Features.push_back("+neon"); + Features.push_back("-crypto"); + break; + case ARM::NS_None: + Features.push_back("-neon"); + Features.push_back("-crypto"); + break; + } + + return true; +} + const char *ARMTargetParser::getArchName(unsigned ArchKind) { if (ArchKind >= ARM::AK_LAST) return nullptr; return ARCHNames[ArchKind].Name; } -const char *ARMTargetParser::getArchDefaultCPUName(unsigned ArchKind) { +const char *ARMTargetParser::getCPUAttr(unsigned ArchKind) { + if (ArchKind >= ARM::AK_LAST) + return nullptr; + return ARCHNames[ArchKind].CPUAttr; +} + +const char *ARMTargetParser::getSubArch(unsigned ArchKind) { if (ArchKind >= ARM::AK_LAST) return nullptr; - return ARCHNames[ArchKind].DefaultCPU; + return ARCHNames[ArchKind].SubArch; } -unsigned ARMTargetParser::getArchDefaultCPUArch(unsigned ArchKind) { +unsigned ARMTargetParser::getArchAttr(unsigned ArchKind) { if (ArchKind >= ARM::AK_LAST) return ARMBuildAttrs::CPUArch::Pre_v4; - return ARCHNames[ArchKind].DefaultArch; + return ARCHNames[ArchKind].ArchAttr; } const char *ARMTargetParser::getArchExtName(unsigned ArchExtKind) { @@ -254,10 +377,9 @@ StringRef ARMTargetParser::getFPUSynonym(StringRef FPU) { .Case("vfp4", "vfpv4") .Case("vfp3-d16", "vfpv3-d16") .Case("vfp4-d16", "vfpv4-d16") - // FIXME: sp-16 is NOT the same as d16 - .Cases("fp4-sp-d16", "fpv4-sp-d16", "vfpv4-d16") + .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16") .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16") - .Cases("fp5-sp-d16", "fpv5-sp-d16", "fpv5-d16") + .Case("fp5-sp-d16", "fpv5-sp-d16") .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16") // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3. .Case("neon-vfpv3", "neon") @@ -266,15 +388,14 @@ StringRef ARMTargetParser::getFPUSynonym(StringRef FPU) { StringRef ARMTargetParser::getArchSynonym(StringRef Arch) { return StringSwitch<StringRef>(Arch) - .Cases("armv6m", "v6m", "armv6-m") - .Cases("armv7a", "v7a", "armv7-a") - .Cases("armv7r", "v7r", "armv7-r") - .Cases("armv7m", "v7m", "armv7-m") - .Cases("armv7em", "v7em", "armv7e-m") - .Cases("armv8", "v8", "armv8-a") - .Cases("armv8a", "v8a", "armv8-a") - .Cases("armv8.1a", "v8.1a", "armv8.1-a") - .Cases("aarch64", "arm64", "armv8-a") + .Case("v6sm", "v6s-m") + .Case("v6m", "v6-m") + .Case("v7a", "v7-a") + .Case("v7r", "v7-r") + .Case("v7m", "v7-m") + .Case("v7em", "v7e-m") + .Cases("v8", "v8a", "aarch64", "arm64", "v8-a") + .Case("v8.1a", "v8.1-a") .Default(Arch); } @@ -342,6 +463,7 @@ unsigned ARMTargetParser::parseFPU(StringRef FPU) { // Allows partial match, ex. "v7a" matches "armv7a". unsigned ARMTargetParser::parseArch(StringRef Arch) { + Arch = getCanonicalArchName(Arch); StringRef Syn = getArchSynonym(Arch); for (const auto A : ARCHNames) { if (StringRef(A.Name).endswith(Syn)) @@ -463,5 +585,3 @@ unsigned ARMTargetParser::parseArchVersion(StringRef Arch) { } return 0; } - -} // namespace llvm diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index a63426f88571..ad99386e6574 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Support/Host.h" #include <cstring> using namespace llvm; @@ -24,7 +25,8 @@ const char *Triple::getArchTypeName(ArchType Kind) { case aarch64_be: return "aarch64_be"; case arm: return "arm"; case armeb: return "armeb"; - case bpf: return "bpf"; + case bpfel: return "bpfel"; + case bpfeb: return "bpfeb"; case hexagon: return "hexagon"; case mips: return "mips"; case mipsel: return "mipsel"; @@ -89,7 +91,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case amdgcn: case r600: return "amdgpu"; - case bpf: return "bpf"; + case bpfel: + case bpfeb: return "bpf"; case sparcv9: case sparcel: @@ -192,14 +195,30 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { llvm_unreachable("Invalid EnvironmentType!"); } +static Triple::ArchType parseBPFArch(StringRef ArchName) { + if (ArchName.equals("bpf")) { + if (sys::IsLittleEndianHost) + return Triple::bpfel; + else + return Triple::bpfeb; + } else if (ArchName.equals("bpf_be") || ArchName.equals("bpfeb")) { + return Triple::bpfeb; + } else if (ArchName.equals("bpf_le") || ArchName.equals("bpfel")) { + return Triple::bpfel; + } else { + return Triple::UnknownArch; + } +} + Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { + Triple::ArchType BPFArch(parseBPFArch(Name)); return StringSwitch<Triple::ArchType>(Name) .Case("aarch64", aarch64) .Case("aarch64_be", aarch64_be) .Case("arm64", aarch64) // "arm64" is an alias for "aarch64" .Case("arm", arm) .Case("armeb", armeb) - .Case("bpf", bpf) + .StartsWith("bpf", BPFArch) .Case("mips", mips) .Case("mipsel", mipsel) .Case("mips64", mips64) @@ -296,6 +315,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) { static Triple::ArchType parseArch(StringRef ArchName) { Triple::ArchType ARMArch(parseARMArch(ArchName)); + Triple::ArchType BPFArch(parseBPFArch(ArchName)); return StringSwitch<Triple::ArchType>(ArchName) .Cases("i386", "i486", "i586", "i686", Triple::x86) @@ -317,7 +337,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("mips64el", Triple::mips64el) .Case("r600", Triple::r600) .Case("amdgcn", Triple::amdgcn) - .Case("bpf", Triple::bpf) + .StartsWith("bpf", BPFArch) .Case("hexagon", Triple::hexagon) .Case("s390x", Triple::systemz) .Case("sparc", Triple::sparc) @@ -702,6 +722,16 @@ std::string Triple::normalize(StringRef Str) { // Special case logic goes here. At this point Arch, Vendor and OS have the // correct values for the computed components. + std::string NormalizedEnvironment; + if (Environment == Triple::Android && Components[3].startswith("androideabi")) { + StringRef AndroidVersion = Components[3].drop_front(strlen("androideabi")); + if (AndroidVersion.empty()) { + Components[3] = "android"; + } else { + NormalizedEnvironment = Twine("android", AndroidVersion).str(); + Components[3] = NormalizedEnvironment; + } + } if (OS == Triple::Win32) { Components.resize(4); @@ -779,41 +809,47 @@ static unsigned EatNumber(StringRef &Str) { return Result; } -void Triple::getOSVersion(unsigned &Major, unsigned &Minor, - unsigned &Micro) const { - StringRef OSName = getOSName(); - - // For Android, we care about the Android version rather than the Linux - // version. - if (getEnvironment() == Android) { - OSName = getEnvironmentName().substr(strlen("android")); - if (OSName.startswith("eabi")) - OSName = OSName.substr(strlen("eabi")); - } - - // Assume that the OS portion of the triple starts with the canonical name. - StringRef OSTypeName = getOSTypeName(getOS()); - if (OSName.startswith(OSTypeName)) - OSName = OSName.substr(OSTypeName.size()); - +static void parseVersionFromName(StringRef Name, unsigned &Major, + unsigned &Minor, unsigned &Micro) { // Any unset version defaults to 0. Major = Minor = Micro = 0; // Parse up to three components. - unsigned *Components[3] = { &Major, &Minor, &Micro }; + unsigned *Components[3] = {&Major, &Minor, &Micro}; for (unsigned i = 0; i != 3; ++i) { - if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + if (Name.empty() || Name[0] < '0' || Name[0] > '9') break; // Consume the leading number. - *Components[i] = EatNumber(OSName); + *Components[i] = EatNumber(Name); // Consume the separator, if present. - if (OSName.startswith(".")) - OSName = OSName.substr(1); + if (Name.startswith(".")) + Name = Name.substr(1); } } +void Triple::getEnvironmentVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + StringRef EnvironmentName = getEnvironmentName(); + StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment()); + if (EnvironmentName.startswith(EnvironmentTypeName)) + EnvironmentName = EnvironmentName.substr(EnvironmentTypeName.size()); + + parseVersionFromName(EnvironmentName, Major, Minor, Micro); +} + +void Triple::getOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + StringRef OSName = getOSName(); + // Assume that the OS portion of the triple starts with the canonical name. + StringRef OSTypeName = getOSTypeName(getOS()); + if (OSName.startswith(OSTypeName)) + OSName = OSName.substr(OSTypeName.size()); + + parseVersionFromName(OSName, Major, Minor, Micro); +} + bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const { getOSVersion(Major, Minor, Micro); @@ -973,7 +1009,8 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::amdgcn: - case llvm::Triple::bpf: + case llvm::Triple::bpfel: + case llvm::Triple::bpfeb: case llvm::Triple::le64: case llvm::Triple::mips64: case llvm::Triple::mips64el: @@ -1010,7 +1047,8 @@ Triple Triple::get32BitArchVariant() const { case Triple::aarch64: case Triple::aarch64_be: case Triple::amdgcn: - case Triple::bpf: + case Triple::bpfel: + case Triple::bpfeb: case Triple::msp430: case Triple::systemz: case Triple::ppc64le: @@ -1074,7 +1112,8 @@ Triple Triple::get64BitArchVariant() const { case Triple::aarch64: case Triple::aarch64_be: - case Triple::bpf: + case Triple::bpfel: + case Triple::bpfeb: case Triple::le64: case Triple::amdil64: case Triple::amdgcn: @@ -1108,13 +1147,13 @@ Triple Triple::get64BitArchVariant() const { const char *Triple::getARMCPUForArch(StringRef MArch) const { if (MArch.empty()) MArch = getArchName(); + MArch = ARMTargetParser::getCanonicalArchName(MArch); // Some defaults are forced. switch (getOS()) { case llvm::Triple::FreeBSD: case llvm::Triple::NetBSD: - // FIXME: This doesn't work on BE/thumb variants. - if (MArch == "armv6") + if (!MArch.empty() && MArch == "v6") return "arm1176jzf-s"; break; case llvm::Triple::Win32: @@ -1124,7 +1163,6 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { break; } - MArch = ARMTargetParser::getCanonicalArchName(MArch); if (MArch.empty()) return nullptr; diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index 90f34f6e232d..6b59a16514b1 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -97,6 +97,10 @@ bool Input::nextDocument() { return ++DocIterator != Strm->end(); } +const Node *Input::getCurrentNode() const { + return CurrentNode ? CurrentNode->_node : nullptr; +} + bool Input::mapTag(StringRef Tag, bool Default) { std::string foundTag = CurrentNode->_node->getVerbatimTag(); if (foundTag.empty()) { @@ -400,9 +404,10 @@ bool Input::canElideEmptySequence() { // Output //===----------------------------------------------------------------------===// -Output::Output(raw_ostream &yout, void *context) +Output::Output(raw_ostream &yout, void *context, int WrapColumn) : IO(context), Out(yout), + WrapColumn(WrapColumn), Column(0), ColumnAtFlowStart(0), ColumnAtMapFlowStart(0), @@ -525,7 +530,7 @@ void Output::endFlowSequence() { bool Output::preflightFlowElement(unsigned, void *&) { if (NeedFlowSequenceComma) output(", "); - if (Column > 70) { + if (WrapColumn && Column > WrapColumn) { output("\n"); for (int i = 0; i < ColumnAtFlowStart; ++i) output(" "); @@ -716,7 +721,7 @@ void Output::paddedKey(StringRef key) { void Output::flowKey(StringRef Key) { if (StateStack.back() == inFlowMapOtherKey) output(", "); - if (Column > 70) { + if (WrapColumn && Column > WrapColumn) { output("\n"); for (int I = 0; I < ColumnAtMapFlowStart; ++I) output(" "); diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 4c0b6c7b5634..42f830bbf0fa 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -815,7 +815,7 @@ void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { } uint64_t raw_svector_ostream::current_pos() const { - return OS.size(); + return OS.size(); } StringRef raw_svector_ostream::str() { diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 9783922b966d..97e796c1349a 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -86,7 +86,6 @@ IntRecTy IntRecTy::Shared; StringRecTy StringRecTy::Shared; DagRecTy DagRecTy::Shared; -void RecTy::anchor() { } void RecTy::dump() const { print(errs()); } ListRecTy *RecTy::getListTy() { @@ -95,47 +94,15 @@ ListRecTy *RecTy::getListTy() { return ListTy.get(); } -bool RecTy::baseClassOf(const RecTy *RHS) const { - assert (RHS && "NULL pointer"); +bool RecTy::typeIsConvertibleTo(const RecTy *RHS) const { + assert(RHS && "NULL pointer"); return Kind == RHS->getRecTyKind(); } -Init *BitRecTy::convertValue(BitsInit *BI) { - if (BI->getNumBits() != 1) return nullptr; // Only accept if just one bit! - return BI->getBit(0); -} - -Init *BitRecTy::convertValue(IntInit *II) { - int64_t Val = II->getValue(); - if (Val != 0 && Val != 1) return nullptr; // Only accept 0 or 1 for a bit! - - return BitInit::get(Val != 0); -} - -Init *BitRecTy::convertValue(TypedInit *TI) { - RecTy *Ty = TI->getType(); - if (isa<BitRecTy>(Ty)) - return TI; // Accept variable if it is already of bit type! - if (auto *BitsTy = dyn_cast<BitsRecTy>(Ty)) - // Accept only bits<1> expression. - return BitsTy->getNumBits() == 1 ? TI : nullptr; - // Ternary !if can be converted to bit, but only if both sides are - // convertible to a bit. - if (TernOpInit *TOI = dyn_cast<TernOpInit>(TI)) { - if (TOI->getOpcode() != TernOpInit::TernaryOp::IF) - return nullptr; - if (!TOI->getMHS()->convertInitializerTo(BitRecTy::get()) || - !TOI->getRHS()->convertInitializerTo(BitRecTy::get())) - return nullptr; - return TOI; - } - return nullptr; -} - -bool BitRecTy::baseClassOf(const RecTy *RHS) const{ - if(RecTy::baseClassOf(RHS) || RHS->getRecTyKind() == IntRecTyKind) +bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{ + if (RecTy::typeIsConvertibleTo(RHS) || RHS->getRecTyKind() == IntRecTyKind) return true; - if(const BitsRecTy *BitsTy = dyn_cast<BitsRecTy>(RHS)) + if (const BitsRecTy *BitsTy = dyn_cast<BitsRecTy>(RHS)) return BitsTy->getNumBits() == 1; return false; } @@ -154,193 +121,34 @@ std::string BitsRecTy::getAsString() const { return "bits<" + utostr(Size) + ">"; } -Init *BitsRecTy::convertValue(UnsetInit *UI) { - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = UnsetInit::get(); - - return BitsInit::get(NewBits); -} - -Init *BitsRecTy::convertValue(BitInit *BI) { - if (Size != 1) return nullptr; // Can only convert single bit. - return BitsInit::get(BI); -} - -/// canFitInBitfield - Return true if the number of bits is large enough to hold -/// the integer value. -static bool canFitInBitfield(int64_t Value, unsigned NumBits) { - // For example, with NumBits == 4, we permit Values from [-7 .. 15]. - return (NumBits >= sizeof(Value) * 8) || - (Value >> NumBits == 0) || (Value >> (NumBits-1) == -1); -} - -/// convertValue from Int initializer to bits type: Split the integer up into the -/// appropriate bits. -/// -Init *BitsRecTy::convertValue(IntInit *II) { - int64_t Value = II->getValue(); - // Make sure this bitfield is large enough to hold the integer value. - if (!canFitInBitfield(Value, Size)) - return nullptr; - - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = BitInit::get(Value & (1LL << i)); - - return BitsInit::get(NewBits); -} - -Init *BitsRecTy::convertValue(BitsInit *BI) { - // If the number of bits is right, return it. Otherwise we need to expand or - // truncate. - if (BI->getNumBits() == Size) return BI; - return nullptr; -} - -Init *BitsRecTy::convertValue(TypedInit *TI) { - if (Size == 1 && isa<BitRecTy>(TI->getType())) - return BitsInit::get(TI); - - if (TI->getType()->typeIsConvertibleTo(this)) { - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = VarBitInit::get(TI, i); - return BitsInit::get(NewBits); - } - - return nullptr; -} - -bool BitsRecTy::baseClassOf(const RecTy *RHS) const{ - if (RecTy::baseClassOf(RHS)) //argument and the receiver are the same type +bool BitsRecTy::typeIsConvertibleTo(const RecTy *RHS) const { + if (RecTy::typeIsConvertibleTo(RHS)) //argument and the sender are same type return cast<BitsRecTy>(RHS)->Size == Size; RecTyKind kind = RHS->getRecTyKind(); return (kind == BitRecTyKind && Size == 1) || (kind == IntRecTyKind); } -Init *IntRecTy::convertValue(BitInit *BI) { - return IntInit::get(BI->getValue()); -} - -Init *IntRecTy::convertValue(BitsInit *BI) { - int64_t Result = 0; - for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) - if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i))) - Result |= static_cast<int64_t>(Bit->getValue()) << i; - else - return nullptr; - return IntInit::get(Result); -} - -Init *IntRecTy::convertValue(TypedInit *TI) { - if (TI->getType()->typeIsConvertibleTo(this)) - return TI; // Accept variable if already of the right type! - return nullptr; -} - -bool IntRecTy::baseClassOf(const RecTy *RHS) const{ +bool IntRecTy::typeIsConvertibleTo(const RecTy *RHS) const { RecTyKind kind = RHS->getRecTyKind(); return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind; } -Init *StringRecTy::convertValue(UnOpInit *UO) { - if (UO->getOpcode() == UnOpInit::CAST) { - Init *L = UO->getOperand()->convertInitializerTo(this); - if (!L) return nullptr; - if (L != UO->getOperand()) - return UnOpInit::get(UnOpInit::CAST, L, StringRecTy::get()); - return UO; - } - - return convertValue((TypedInit*)UO); -} - -Init *StringRecTy::convertValue(BinOpInit *BO) { - if (BO->getOpcode() == BinOpInit::STRCONCAT) { - Init *L = BO->getLHS()->convertInitializerTo(this); - Init *R = BO->getRHS()->convertInitializerTo(this); - if (!L || !R) return nullptr; - if (L != BO->getLHS() || R != BO->getRHS()) - return BinOpInit::get(BinOpInit::STRCONCAT, L, R, StringRecTy::get()); - return BO; - } - - return convertValue((TypedInit*)BO); -} - - -Init *StringRecTy::convertValue(TypedInit *TI) { - if (isa<StringRecTy>(TI->getType())) - return TI; // Accept variable if already of the right type! - return nullptr; +std::string StringRecTy::getAsString() const { + return "string"; } std::string ListRecTy::getAsString() const { return "list<" + Ty->getAsString() + ">"; } -Init *ListRecTy::convertValue(ListInit *LI) { - std::vector<Init*> Elements; - - // Verify that all of the elements of the list are subclasses of the - // appropriate class! - for (unsigned i = 0, e = LI->getSize(); i != e; ++i) - if (Init *CI = LI->getElement(i)->convertInitializerTo(Ty)) - Elements.push_back(CI); - else - return nullptr; - - if (!isa<ListRecTy>(LI->getType())) - return nullptr; - - return ListInit::get(Elements, this); -} - -Init *ListRecTy::convertValue(TypedInit *TI) { - // Ensure that TI is compatible with our class. - if (ListRecTy *LRT = dyn_cast<ListRecTy>(TI->getType())) - if (LRT->getElementType()->typeIsConvertibleTo(getElementType())) - return TI; - return nullptr; -} - -bool ListRecTy::baseClassOf(const RecTy *RHS) const{ - if(const ListRecTy* ListTy = dyn_cast<ListRecTy>(RHS)) - return ListTy->getElementType()->typeIsConvertibleTo(Ty); +bool ListRecTy::typeIsConvertibleTo(const RecTy *RHS) const { + if (const auto *ListTy = dyn_cast<ListRecTy>(RHS)) + return Ty->typeIsConvertibleTo(ListTy->getElementType()); return false; } -Init *DagRecTy::convertValue(TypedInit *TI) { - if (TI->getType()->typeIsConvertibleTo(this)) - return TI; - return nullptr; -} - -Init *DagRecTy::convertValue(UnOpInit *UO) { - if (UO->getOpcode() == UnOpInit::CAST) { - Init *L = UO->getOperand()->convertInitializerTo(this); - if (!L) return nullptr; - if (L != UO->getOperand()) - return UnOpInit::get(UnOpInit::CAST, L, DagRecTy::get()); - return UO; - } - return nullptr; -} - -Init *DagRecTy::convertValue(BinOpInit *BO) { - if (BO->getOpcode() == BinOpInit::CONCAT) { - Init *L = BO->getLHS()->convertInitializerTo(this); - Init *R = BO->getRHS()->convertInitializerTo(this); - if (!L || !R) return nullptr; - if (L != BO->getLHS() || R != BO->getRHS()) - return BinOpInit::get(BinOpInit::CONCAT, L, R, DagRecTy::get()); - return BO; - } - return nullptr; +std::string DagRecTy::getAsString() const { + return "dag"; } RecordRecTy *RecordRecTy::get(Record *R) { @@ -351,33 +159,16 @@ std::string RecordRecTy::getAsString() const { return Rec->getName(); } -Init *RecordRecTy::convertValue(DefInit *DI) { - // Ensure that DI is a subclass of Rec. - if (!DI->getDef()->isSubClassOf(Rec)) - return nullptr; - return DI; -} - -Init *RecordRecTy::convertValue(TypedInit *TI) { - // Ensure that TI is compatible with Rec. - if (RecordRecTy *RRT = dyn_cast<RecordRecTy>(TI->getType())) - if (RRT->getRecord()->isSubClassOf(getRecord()) || - RRT->getRecord() == getRecord()) - return TI; - return nullptr; -} - -bool RecordRecTy::baseClassOf(const RecTy *RHS) const{ +bool RecordRecTy::typeIsConvertibleTo(const RecTy *RHS) const { const RecordRecTy *RTy = dyn_cast<RecordRecTy>(RHS); if (!RTy) return false; - if (Rec == RTy->getRecord() || RTy->getRecord()->isSubClassOf(Rec)) + if (RTy->getRecord() == Rec || Rec->isSubClassOf(RTy->getRecord())) return true; - const std::vector<Record*> &SC = Rec->getSuperClasses(); - for (unsigned i = 0, e = SC.size(); i != e; ++i) - if (RTy->getRecord()->isSubClassOf(SC[i])) + for (Record *SC : RTy->getRecord()->getSuperClasses()) + if (Rec->isSubClassOf(SC)) return true; return false; @@ -422,14 +213,24 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { void Init::anchor() { } void Init::dump() const { return print(errs()); } -void UnsetInit::anchor() { } - UnsetInit *UnsetInit::get() { static UnsetInit TheInit; return &TheInit; } -void BitInit::anchor() { } +Init *UnsetInit::convertInitializerTo(RecTy *Ty) const { + if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) { + SmallVector<Init *, 16> NewBits(BRT->getNumBits()); + + for (unsigned i = 0; i != BRT->getNumBits(); ++i) + NewBits[i] = UnsetInit::get(); + + return BitsInit::get(NewBits); + } + + // All other types can just be returned. + return const_cast<UnsetInit *>(this); +} BitInit *BitInit::get(bool V) { static BitInit True(true); @@ -438,6 +239,22 @@ BitInit *BitInit::get(bool V) { return V ? &True : &False; } +Init *BitInit::convertInitializerTo(RecTy *Ty) const { + if (isa<BitRecTy>(Ty)) + return const_cast<BitInit *>(this); + + if (isa<IntRecTy>(Ty)) + return IntInit::get(getValue()); + + if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) { + // Can only convert single bit. + if (BRT->getNumBits() == 1) + return BitsInit::get(const_cast<BitInit *>(this)); + } + + return nullptr; +} + static void ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) { ID.AddInteger(Range.size()); @@ -467,6 +284,32 @@ void BitsInit::Profile(FoldingSetNodeID &ID) const { ProfileBitsInit(ID, Bits); } +Init *BitsInit::convertInitializerTo(RecTy *Ty) const { + if (isa<BitRecTy>(Ty)) { + if (getNumBits() != 1) return nullptr; // Only accept if just one bit! + return getBit(0); + } + + if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) { + // If the number of bits is right, return it. Otherwise we need to expand + // or truncate. + if (getNumBits() != BRT->getNumBits()) return nullptr; + return const_cast<BitsInit *>(this); + } + + if (isa<IntRecTy>(Ty)) { + int64_t Result = 0; + for (unsigned i = 0, e = getNumBits(); i != e; ++i) + if (auto *Bit = dyn_cast<BitInit>(getBit(i))) + Result |= static_cast<int64_t>(Bit->getValue()) << i; + else + return nullptr; + return IntInit::get(Result); + } + + return nullptr; +} + Init * BitsInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const { SmallVector<Init *, 16> NewBits(Bits.size()); @@ -560,6 +403,40 @@ std::string IntInit::getAsString() const { return itostr(Value); } +/// canFitInBitfield - Return true if the number of bits is large enough to hold +/// the integer value. +static bool canFitInBitfield(int64_t Value, unsigned NumBits) { + // For example, with NumBits == 4, we permit Values from [-7 .. 15]. + return (NumBits >= sizeof(Value) * 8) || + (Value >> NumBits == 0) || (Value >> (NumBits-1) == -1); +} + +Init *IntInit::convertInitializerTo(RecTy *Ty) const { + if (isa<IntRecTy>(Ty)) + return const_cast<IntInit *>(this); + + if (isa<BitRecTy>(Ty)) { + int64_t Val = getValue(); + if (Val != 0 && Val != 1) return nullptr; // Only accept 0 or 1 for a bit! + return BitInit::get(Val != 0); + } + + if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) { + int64_t Value = getValue(); + // Make sure this bitfield is large enough to hold the integer value. + if (!canFitInBitfield(Value, BRT->getNumBits())) + return nullptr; + + SmallVector<Init *, 16> NewBits(BRT->getNumBits()); + for (unsigned i = 0; i != BRT->getNumBits(); ++i) + NewBits[i] = BitInit::get(Value & (1LL << i)); + + return BitsInit::get(NewBits); + } + + return nullptr; +} + Init * IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const { SmallVector<Init *, 16> NewBits(Bits.size()); @@ -573,8 +450,6 @@ IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const { return BitsInit::get(NewBits); } -void StringInit::anchor() { } - StringInit *StringInit::get(StringRef V) { static StringMap<std::unique_ptr<StringInit>> ThePool; @@ -583,6 +458,13 @@ StringInit *StringInit::get(StringRef V) { return I.get(); } +Init *StringInit::convertInitializerTo(RecTy *Ty) const { + if (isa<StringRecTy>(Ty)) + return const_cast<StringInit *>(this); + + return nullptr; +} + static void ProfileListInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range, RecTy *EltTy) { @@ -616,11 +498,30 @@ void ListInit::Profile(FoldingSetNodeID &ID) const { ProfileListInit(ID, Values, EltTy); } +Init *ListInit::convertInitializerTo(RecTy *Ty) const { + if (auto *LRT = dyn_cast<ListRecTy>(Ty)) { + std::vector<Init*> Elements; + + // Verify that all of the elements of the list are subclasses of the + // appropriate class! + for (Init *I : getValues()) + if (Init *CI = I->convertInitializerTo(LRT->getElementType())) + Elements.push_back(CI); + else + return nullptr; + + if (isa<ListRecTy>(getType())) + return ListInit::get(Elements, Ty); + } + + return nullptr; +} + Init * ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const { std::vector<Init*> Vals; for (unsigned i = 0, e = Elements.size(); i != e; ++i) { - if (Elements[i] >= getSize()) + if (Elements[i] >= size()) return nullptr; Vals.push_back(getElement(Elements[i])); } @@ -637,12 +538,11 @@ Record *ListInit::getElementAsRecord(unsigned i) const { Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const { std::vector<Init*> Resolved; - Resolved.reserve(getSize()); + Resolved.reserve(size()); bool Changed = false; - for (unsigned i = 0, e = getSize(); i != e; ++i) { + for (Init *CurElt : getValues()) { Init *E; - Init *CurElt = getElement(i); do { E = CurElt; @@ -659,7 +559,7 @@ Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const { Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV, unsigned Elt) const { - if (Elt >= getSize()) + if (Elt >= size()) return nullptr; // Out of range reference. Init *E = getElement(Elt); // If the element is set to some value, or if we are resolving a reference @@ -717,12 +617,12 @@ UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) { Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { switch (getOpcode()) { case CAST: { - if (getType()->getAsString() == "string") { + if (isa<StringRecTy>(getType())) { if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) return LHSs; if (DefInit *LHSd = dyn_cast<DefInit>(LHS)) - return StringInit::get(LHSd->getDef()->getName()); + return StringInit::get(LHSd->getAsString()); if (IntInit *LHSi = dyn_cast<IntInit>(LHS)) return StringInit::get(LHSi->getAsString()); @@ -987,11 +887,11 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, MultiClass *CurMultiClass) { // If this is a dag, recurse if (auto *TArg = dyn_cast<TypedInit>(Arg)) - if (TArg->getType()->getAsString() == "dag") + if (isa<DagRecTy>(TArg->getType())) return ForeachHelper(LHS, Arg, RHSo, Type, CurRec, CurMultiClass); std::vector<Init *> NewOperands; - for (int i = 0; i < RHSo->getNumOperands(); ++i) { + for (unsigned i = 0; i < RHSo->getNumOperands(); ++i) { if (auto *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i))) { if (Init *Result = EvaluateOperation(RHSoo, LHS, Arg, Type, CurRec, CurMultiClass)) @@ -1013,8 +913,6 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, Record *CurRec, MultiClass *CurMultiClass) { - DagInit *MHSd = dyn_cast<DagInit>(MHS); - ListInit *MHSl = dyn_cast<ListInit>(MHS); OpInit *RHSo = dyn_cast<OpInit>(RHS); @@ -1026,55 +924,52 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, if (!LHSt) PrintFatalError(CurRec->getLoc(), "!foreach requires typed variable\n"); - if ((MHSd && isa<DagRecTy>(Type)) || (MHSl && isa<ListRecTy>(Type))) { - if (MHSd) { - Init *Val = MHSd->getOperator(); - Init *Result = EvaluateOperation(RHSo, LHS, Val, - Type, CurRec, CurMultiClass); - if (Result) - Val = Result; - - std::vector<std::pair<Init *, std::string> > args; - for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) { - Init *Arg; - std::string ArgName; - Arg = MHSd->getArg(i); - ArgName = MHSd->getArgName(i); - - // Process args - Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type, - CurRec, CurMultiClass); - if (Result) - Arg = Result; - - // TODO: Process arg names - args.push_back(std::make_pair(Arg, ArgName)); - } - - return DagInit::get(Val, "", args); + DagInit *MHSd = dyn_cast<DagInit>(MHS); + if (MHSd && isa<DagRecTy>(Type)) { + Init *Val = MHSd->getOperator(); + if (Init *Result = EvaluateOperation(RHSo, LHS, Val, + Type, CurRec, CurMultiClass)) + Val = Result; + + std::vector<std::pair<Init *, std::string> > args; + for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) { + Init *Arg = MHSd->getArg(i); + std::string ArgName = MHSd->getArgName(i); + + // Process args + if (Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type, + CurRec, CurMultiClass)) + Arg = Result; + + // TODO: Process arg names + args.push_back(std::make_pair(Arg, ArgName)); } - if (MHSl) { - std::vector<Init *> NewOperands; - std::vector<Init *> NewList(MHSl->begin(), MHSl->end()); - - for (Init *&Item : NewList) { - NewOperands.clear(); - for(int i = 0; i < RHSo->getNumOperands(); ++i) { - // First, replace the foreach variable with the list item - if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) - NewOperands.push_back(Item); - else - NewOperands.push_back(RHSo->getOperand(i)); - } - // Now run the operator and use its result as the new list item - const OpInit *NewOp = RHSo->clone(NewOperands); - Init *NewItem = NewOp->Fold(CurRec, CurMultiClass); - if (NewItem != NewOp) - Item = NewItem; + return DagInit::get(Val, "", args); + } + + ListInit *MHSl = dyn_cast<ListInit>(MHS); + if (MHSl && isa<ListRecTy>(Type)) { + std::vector<Init *> NewOperands; + std::vector<Init *> NewList(MHSl->begin(), MHSl->end()); + + for (Init *&Item : NewList) { + NewOperands.clear(); + for(unsigned i = 0; i < RHSo->getNumOperands(); ++i) { + // First, replace the foreach variable with the list item + if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) + NewOperands.push_back(Item); + else + NewOperands.push_back(RHSo->getOperand(i)); } - return ListInit::get(NewList, MHSl->getType()); + + // Now run the operator and use its result as the new list item + const OpInit *NewOp = RHSo->clone(NewOperands); + Init *NewItem = NewOp->Fold(CurRec, CurMultiClass); + if (NewItem != NewOp) + Item = NewItem; } + return ListInit::get(NewList, MHSl->getType()); } return nullptr; } @@ -1196,6 +1091,82 @@ RecTy *TypedInit::getFieldType(const std::string &FieldName) const { } Init * +TypedInit::convertInitializerTo(RecTy *Ty) const { + if (isa<IntRecTy>(Ty)) { + if (getType()->typeIsConvertibleTo(Ty)) + return const_cast<TypedInit *>(this); + return nullptr; + } + + if (isa<StringRecTy>(Ty)) { + if (isa<StringRecTy>(getType())) + return const_cast<TypedInit *>(this); + return nullptr; + } + + if (isa<BitRecTy>(Ty)) { + // Accept variable if it is already of bit type! + if (isa<BitRecTy>(getType())) + return const_cast<TypedInit *>(this); + if (auto *BitsTy = dyn_cast<BitsRecTy>(getType())) { + // Accept only bits<1> expression. + if (BitsTy->getNumBits() == 1) + return const_cast<TypedInit *>(this); + return nullptr; + } + // Ternary !if can be converted to bit, but only if both sides are + // convertible to a bit. + if (const auto *TOI = dyn_cast<TernOpInit>(this)) { + if (TOI->getOpcode() == TernOpInit::TernaryOp::IF && + TOI->getMHS()->convertInitializerTo(BitRecTy::get()) && + TOI->getRHS()->convertInitializerTo(BitRecTy::get())) + return const_cast<TypedInit *>(this); + return nullptr; + } + return nullptr; + } + + if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) { + if (BRT->getNumBits() == 1 && isa<BitRecTy>(getType())) + return BitsInit::get(const_cast<TypedInit *>(this)); + + if (getType()->typeIsConvertibleTo(BRT)) { + SmallVector<Init *, 16> NewBits(BRT->getNumBits()); + + for (unsigned i = 0; i != BRT->getNumBits(); ++i) + NewBits[i] = VarBitInit::get(const_cast<TypedInit *>(this), i); + return BitsInit::get(NewBits); + } + + return nullptr; + } + + if (auto *DLRT = dyn_cast<ListRecTy>(Ty)) { + if (auto *SLRT = dyn_cast<ListRecTy>(getType())) + if (SLRT->getElementType()->typeIsConvertibleTo(DLRT->getElementType())) + return const_cast<TypedInit *>(this); + return nullptr; + } + + if (auto *DRT = dyn_cast<DagRecTy>(Ty)) { + if (getType()->typeIsConvertibleTo(DRT)) + return const_cast<TypedInit *>(this); + return nullptr; + } + + if (auto *SRRT = dyn_cast<RecordRecTy>(Ty)) { + // Ensure that this is compatible with Rec. + if (RecordRecTy *DRRT = dyn_cast<RecordRecTy>(getType())) + if (DRRT->getRecord()->isSubClassOf(SRRT->getRecord()) || + DRRT->getRecord() == SRRT->getRecord()) + return const_cast<TypedInit *>(this); + return nullptr; + } + + return nullptr; +} + +Init * TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const { BitsRecTy *T = dyn_cast<BitsRecTy>(getType()); if (!T) return nullptr; // Cannot subscript a non-bits variable. @@ -1267,7 +1238,7 @@ Init *VarInit::resolveListElementReference(Record &R, if (!LI) return VarListElementInit::get(cast<TypedInit>(RV->getValue()), Elt); - if (Elt >= LI->getSize()) + if (Elt >= LI->size()) return nullptr; // Out of range reference. Init *E = LI->getElement(Elt); // If the element is set to some value, or if we are resolving a reference @@ -1324,6 +1295,13 @@ VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) { return I.get(); } +Init *VarBitInit::convertInitializerTo(RecTy *Ty) const { + if (isa<BitRecTy>(Ty)) + return const_cast<VarBitInit *>(this); + + return nullptr; +} + std::string VarBitInit::getAsString() const { return TI->getAsString() + "{" + utostr(Bit) + "}"; } @@ -1371,8 +1349,8 @@ Init *VarListElementInit:: resolveListElementReference(Record &R, unsigned Elt) const { if (Init *Result = TI->resolveListElementReference(R, RV, Element)) { if (TypedInit *TInit = dyn_cast<TypedInit>(Result)) { - Init *Result2 = TInit->resolveListElementReference(R, RV, Elt); - if (Result2) return Result2; + if (Init *Result2 = TInit->resolveListElementReference(R, RV, Elt)) + return Result2; return VarListElementInit::get(TInit, Elt); } return Result; @@ -1385,6 +1363,13 @@ DefInit *DefInit::get(Record *R) { return R->getDefInit(); } +Init *DefInit::convertInitializerTo(RecTy *Ty) const { + if (auto *RRT = dyn_cast<RecordRecTy>(Ty)) + if (getDef()->isSubClassOf(RRT->getRecord())) + return const_cast<DefInit *>(this); + return nullptr; +} + RecTy *DefInit::getFieldType(const std::string &FieldName) const { if (const RecordVal *RV = Def->getValue(FieldName)) return RV->getType(); @@ -1422,7 +1407,7 @@ Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const { if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName)) if (ListInit *LI = dyn_cast<ListInit>(ListVal)) { - if (Elt >= LI->getSize()) return nullptr; + if (Elt >= LI->size()) return nullptr; Init *E = LI->getElement(Elt); // If the element is set to some value, or if we are resolving a @@ -1501,6 +1486,13 @@ void DagInit::Profile(FoldingSetNodeID &ID) const { ProfileDagInit(ID, Val, ValName, Args, ArgNames); } +Init *DagInit::convertInitializerTo(RecTy *Ty) const { + if (isa<DagRecTy>(Ty)) + return const_cast<DagInit *>(this); + + return nullptr; +} + Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const { std::vector<Init*> NewArgs; for (unsigned i = 0, e = Args.size(); i != e; ++i) @@ -1535,20 +1527,20 @@ std::string DagInit::getAsString() const { // Other implementations //===----------------------------------------------------------------------===// -RecordVal::RecordVal(Init *N, RecTy *T, unsigned P) - : Name(N), Ty(T), Prefix(P) { - Value = Ty->convertValue(UnsetInit::get()); +RecordVal::RecordVal(Init *N, RecTy *T, bool P) + : NameAndPrefix(N, P), Ty(T) { + Value = UnsetInit::get()->convertInitializerTo(Ty); assert(Value && "Cannot create unset value for current type!"); } -RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P) - : Name(StringInit::get(N)), Ty(T), Prefix(P) { - Value = Ty->convertValue(UnsetInit::get()); +RecordVal::RecordVal(const std::string &N, RecTy *T, bool P) + : NameAndPrefix(StringInit::get(N), P), Ty(T) { + Value = UnsetInit::get()->convertInitializerTo(Ty); assert(Value && "Cannot create unset value for current type!"); } const std::string &RecordVal::getName() const { - return cast<StringInit>(Name)->getValue(); + return cast<StringInit>(getNameInit())->getValue(); } void RecordVal::dump() const { errs() << *this; } @@ -1577,8 +1569,7 @@ void Record::init() { void Record::checkName() { // Ensure the record name has string type. const TypedInit *TypedName = cast<const TypedInit>(Name); - RecTy *Type = TypedName->getType(); - if (!isa<StringRecTy>(Type)) + if (!isa<StringRecTy>(TypedName->getType())) PrintFatalError(getLoc(), "Record name is not a string!"); } @@ -1649,9 +1640,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) { const std::vector<Init *> &TArgs = R.getTemplateArgs(); if (!TArgs.empty()) { OS << "<"; - for (unsigned i = 0, e = TArgs.size(); i != e; ++i) { - if (i) OS << ", "; - const RecordVal *RV = R.getValue(TArgs[i]); + bool NeedComma = false; + for (const Init *TA : TArgs) { + if (NeedComma) OS << ", "; + NeedComma = true; + const RecordVal *RV = R.getValue(TA); assert(RV && "Template argument record not found??"); RV->print(OS, false); } @@ -1662,18 +1655,17 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) { const std::vector<Record*> &SC = R.getSuperClasses(); if (!SC.empty()) { OS << "\t//"; - for (unsigned i = 0, e = SC.size(); i != e; ++i) - OS << " " << SC[i]->getNameInitAsString(); + for (const Record *Super : SC) + OS << " " << Super->getNameInitAsString(); } OS << "\n"; - const std::vector<RecordVal> &Vals = R.getValues(); - for (unsigned i = 0, e = Vals.size(); i != e; ++i) - if (Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName())) - OS << Vals[i]; - for (unsigned i = 0, e = Vals.size(); i != e; ++i) - if (!Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName())) - OS << Vals[i]; + for (const RecordVal &Val : R.getValues()) + if (Val.getPrefix() && !R.isTemplateArg(Val.getName())) + OS << Val; + for (const RecordVal &Val : R.getValues()) + if (!Val.getPrefix() && !R.isTemplateArg(Val.getName())) + OS << Val; return OS << "}\n"; } @@ -1746,8 +1738,8 @@ std::vector<Record*> Record::getValueAsListOfDefs(StringRef FieldName) const { ListInit *List = getValueAsListInit(FieldName); std::vector<Record*> Defs; - for (unsigned i = 0; i < List->getSize(); i++) { - if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i))) + for (Init *I : List->getValues()) { + if (DefInit *DI = dyn_cast<DefInit>(I)) Defs.push_back(DI->getDef()); else PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + @@ -1780,8 +1772,8 @@ std::vector<int64_t> Record::getValueAsListOfInts(StringRef FieldName) const { ListInit *List = getValueAsListInit(FieldName); std::vector<int64_t> Ints; - for (unsigned i = 0; i < List->getSize(); i++) { - if (IntInit *II = dyn_cast<IntInit>(List->getElement(i))) + for (Init *I : List->getValues()) { + if (IntInit *II = dyn_cast<IntInit>(I)) Ints.push_back(II->getValue()); else PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + @@ -1798,9 +1790,9 @@ std::vector<std::string> Record::getValueAsListOfStrings(StringRef FieldName) const { ListInit *List = getValueAsListInit(FieldName); std::vector<std::string> Strings; - for (unsigned i = 0; i < List->getSize(); i++) { - if (StringInit *II = dyn_cast<StringInit>(List->getElement(i))) - Strings.push_back(II->getValue()); + for (Init *I : List->getValues()) { + if (StringInit *SI = dyn_cast<StringInit>(I)) + Strings.push_back(SI->getValue()); else PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName + "' does not have a list of strings initializer!"); @@ -1922,23 +1914,23 @@ Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass, RecTy *Type = cast<TypedInit>(Name)->getType(); BinOpInit *NewName = - BinOpInit::get(BinOpInit::STRCONCAT, - BinOpInit::get(BinOpInit::STRCONCAT, - CurRec.getNameInit(), - StringInit::get(Scoper), - Type)->Fold(&CurRec, CurMultiClass), - Name, - Type); + BinOpInit::get(BinOpInit::STRCONCAT, + BinOpInit::get(BinOpInit::STRCONCAT, + CurRec.getNameInit(), + StringInit::get(Scoper), + Type)->Fold(&CurRec, CurMultiClass), + Name, + Type); if (CurMultiClass && Scoper != "::") { NewName = - BinOpInit::get(BinOpInit::STRCONCAT, - BinOpInit::get(BinOpInit::STRCONCAT, - CurMultiClass->Rec.getNameInit(), - StringInit::get("::"), - Type)->Fold(&CurRec, CurMultiClass), - NewName->Fold(&CurRec, CurMultiClass), - Type); + BinOpInit::get(BinOpInit::STRCONCAT, + BinOpInit::get(BinOpInit::STRCONCAT, + CurMultiClass->Rec.getNameInit(), + StringInit::get("::"), + Type)->Fold(&CurRec, CurMultiClass), + NewName->Fold(&CurRec, CurMultiClass), + Type); } return NewName->Fold(&CurRec, CurMultiClass); diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 0e654f982410..15df25aea50e 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -118,7 +118,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, for (unsigned i = 0, e = BitList.size(); i != e; ++i) { unsigned Bit = BitList[i]; if (NewBits[Bit]) - return Error(Loc, "Cannot set bit #" + utostr(Bit) + " of value '" + + return Error(Loc, "Cannot set bit #" + Twine(Bit) + " of value '" + ValName->getAsUnquotedString() + "' more than once"); NewBits[Bit] = BInit->getBit(i); } @@ -148,9 +148,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { Record *SC = SubClass.Rec; // Add all of the values in the subclass into the current class. - const std::vector<RecordVal> &Vals = SC->getValues(); - for (unsigned i = 0, e = Vals.size(); i != e; ++i) - if (AddValue(CurRec, SubClass.RefRange.Start, Vals[i])) + for (const RecordVal &Val : SC->getValues()) + if (AddValue(CurRec, SubClass.RefRange.Start, Val)) return true; const std::vector<Init *> &TArgs = SC->getTemplateArgs(); @@ -178,7 +177,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) { return Error(SubClass.RefRange.Start, "Value not specified for template argument #" + - utostr(i) + " (" + TArgs[i]->getAsUnquotedString() + + Twine(i) + " (" + TArgs[i]->getAsUnquotedString() + ") of subclass '" + SC->getNameInitAsString() + "'!"); } } @@ -272,7 +271,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) { return Error(SubMultiClass.RefRange.Start, "Value not specified for template argument #" + - utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString() + + Twine(i) + " (" + SMCTArgs[i]->getAsUnquotedString() + ") of subclass '" + SMC->Rec.getNameInitAsString() + "'!"); } } @@ -309,7 +308,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ } // Process each value. - for (int64_t i = 0; i < List->getSize(); ++i) { + for (unsigned i = 0; i < List->size(); ++i) { Init *ItemVal = List->resolveListElementReference(*CurRec, nullptr, i); IterVals.push_back(IterRecord(CurLoop.IterVar, ItemVal)); if (ProcessForeachDefs(CurRec, Loc, IterVals)) @@ -325,9 +324,9 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ auto IterRec = make_unique<Record>(*CurRec); // Set the iterator values now. - for (unsigned i = 0, e = IterVals.size(); i != e; ++i) { - VarInit *IterVar = IterVals[i].IterVar; - TypedInit *IVal = dyn_cast<TypedInit>(IterVals[i].IterValue); + for (IterRecord &IR : IterVals) { + VarInit *IterVar = IR.IterVar; + TypedInit *IVal = dyn_cast<TypedInit>(IR.IterValue); if (!IVal) return Error(Loc, "foreach iterator value is untyped"); @@ -1296,7 +1295,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, // All other values must be convertible to just a single bit. Init *Bit = Vals[i]->convertInitializerTo(BitRecTy::get()); if (!Bit) { - Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+ + Error(BraceLoc, "Element #" + Twine(i) + " (" + Vals[i]->getAsString() + ") is not convertable to a bit"); return nullptr; } @@ -1315,11 +1314,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, if (ItemType) { ListRecTy *ListType = dyn_cast<ListRecTy>(ItemType); if (!ListType) { - std::string s; - raw_string_ostream ss(s); - ss << "Type mismatch for list, expected list type, got " - << ItemType->getAsString(); - TokError(ss.str()); + TokError(Twine("Type mismatch for list, expected list type, got ") + + ItemType->getAsString()); return nullptr; } GivenListTy = ListType; @@ -1604,7 +1600,7 @@ TGParser::ParseDagArgList(Record *CurRec) { // DagArg ::= VARNAME if (Lex.getCode() == tgtok::VarName) { // A missing value is treated like '?'. - Result.push_back(std::make_pair(UnsetInit::get(), Lex.getCurStrVal())); + Result.emplace_back(UnsetInit::get(), Lex.getCurStrVal()); Lex.Lex(); } else { // DagArg ::= Value (':' VARNAME)? @@ -1810,8 +1806,8 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) { assert(!IterType && "Type already initialized?"); IterType = IntRecTy::get(); std::vector<Init*> Values; - for (unsigned i = 0, e = Ranges.size(); i != e; ++i) - Values.push_back(IntInit::get(Ranges[i])); + for (unsigned R : Ranges) + Values.push_back(IntInit::get(R)); ForeachListValue = ListInit::get(Values, IterType); } @@ -1937,10 +1933,9 @@ bool TGParser::ParseBody(Record *CurRec) { /// \brief Apply the current let bindings to \a CurRec. /// \returns true on error, false otherwise. bool TGParser::ApplyLetStack(Record *CurRec) { - for (unsigned i = 0, e = LetStack.size(); i != e; ++i) - for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j) - if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name, - LetStack[i][j].Bits, LetStack[i][j].Value)) + for (std::vector<LetRecord> &LetInfo : LetStack) + for (LetRecord &LR : LetInfo) + if (SetValue(CurRec, LR.Loc, LR.Name, LR.Bits, LR.Value)) return true; return false; } @@ -2177,7 +2172,7 @@ std::vector<LetRecord> TGParser::ParseLetList() { if (!Val) return std::vector<LetRecord>(); // Now that we have everything, add the record. - Result.push_back(LetRecord(Name, Bits, Val, NameLoc)); + Result.emplace_back(std::move(Name), std::move(Bits), Val, NameLoc); if (Lex.getCode() != tgtok::comma) return Result; @@ -2468,7 +2463,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) { return Error(SubClassLoc, "value not specified for template argument #" + - utostr(i) + " (" + TArgs[i]->getAsUnquotedString() + + Twine(i) + " (" + TArgs[i]->getAsUnquotedString() + ") of multiclassclass '" + MC.Rec.getNameInitAsString() + "'"); } diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index a0a09e4a833b..da22d8d9e4c5 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -206,7 +206,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, // FIXME: Can we get anything other than a plain symbol here? assert(!MO.getTargetFlags() && "Unknown operand target flag!"); - O << *Sym; + Sym->print(O, MAI); printOffset(MO.getOffset(), O); break; } diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 78a2021f79a3..1ea4abcf05fa 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -156,6 +156,9 @@ public: SDNode *SelectLIBM(SDNode *N); + SDNode *SelectReadRegister(SDNode *N); + SDNode *SelectWriteRegister(SDNode *N); + // Include the pieces autogenerated from the target description. #include "AArch64GenDAGISel.inc" @@ -2114,6 +2117,120 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, return true; } +// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields +// of the string and obtains the integer values from them and combines these +// into a single value to be used in the MRS/MSR instruction. +static int getIntOperandFromRegisterString(StringRef RegString) { + SmallVector<StringRef, 5> Fields; + RegString.split(Fields, ":"); + + if (Fields.size() == 1) + return -1; + + assert(Fields.size() == 5 + && "Invalid number of fields in read register string"); + + SmallVector<int, 5> Ops; + bool AllIntFields = true; + + for (StringRef Field : Fields) { + unsigned IntField; + AllIntFields &= !Field.getAsInteger(10, IntField); + Ops.push_back(IntField); + } + + assert(AllIntFields && + "Unexpected non-integer value in special register string."); + + // Need to combine the integer fields of the string into a single value + // based on the bit encoding of MRS/MSR instruction. + return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | + (Ops[3] << 3) | (Ops[4]); +} + +// Lower the read_register intrinsic to an MRS instruction node if the special +// register string argument is either of the form detailed in the ALCE (the +// form described in getIntOperandsFromRegsterString) or is a named register +// known by the MRS SysReg mapper. +SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) { + const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); + const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + SDLoc DL(N); + + int Reg = getIntOperandFromRegisterString(RegString->getString()); + if (Reg != -1) + return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0), + MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + N->getOperand(0)); + + // Use the sysreg mapper to map the remaining possible strings to the + // value for the register to be used for the instruction operand. + AArch64SysReg::MRSMapper mapper; + bool IsValidSpecialReg; + Reg = mapper.fromString(RegString->getString(), + Subtarget->getFeatureBits(), + IsValidSpecialReg); + if (IsValidSpecialReg) + return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0), + MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + N->getOperand(0)); + + return nullptr; +} + +// Lower the write_register intrinsic to an MSR instruction node if the special +// register string argument is either of the form detailed in the ALCE (the +// form described in getIntOperandsFromRegsterString) or is a named register +// known by the MSR SysReg mapper. +SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) { + const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); + const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + SDLoc DL(N); + + int Reg = getIntOperandFromRegisterString(RegString->getString()); + if (Reg != -1) + return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + N->getOperand(2), N->getOperand(0)); + + // Check if the register was one of those allowed as the pstatefield value in + // the MSR (immediate) instruction. To accept the values allowed in the + // pstatefield for the MSR (immediate) instruction, we also require that an + // immediate value has been provided as an argument, we know that this is + // the case as it has been ensured by semantic checking. + AArch64PState::PStateMapper PMapper; + bool IsValidSpecialReg; + Reg = PMapper.fromString(RegString->getString(), + Subtarget->getFeatureBits(), + IsValidSpecialReg); + if (IsValidSpecialReg) { + assert (isa<ConstantSDNode>(N->getOperand(2)) + && "Expected a constant integer expression."); + uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + CurDAG->getTargetConstant(Immed, DL, MVT::i16), + N->getOperand(0)); + } + + // Use the sysreg mapper to attempt to map the remaining possible strings + // to the value for the register to be used for the MSR (register) + // instruction operand. + AArch64SysReg::MSRMapper Mapper; + Reg = Mapper.fromString(RegString->getString(), + Subtarget->getFeatureBits(), + IsValidSpecialReg); + + if (IsValidSpecialReg) + return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, + CurDAG->getTargetConstant(Reg, DL, MVT::i32), + N->getOperand(2), N->getOperand(0)); + + return nullptr; +} + SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(errs() << "Selecting: "); @@ -2135,6 +2252,16 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { default: break; + case ISD::READ_REGISTER: + if (SDNode *Res = SelectReadRegister(Node)) + return Res; + break; + + case ISD::WRITE_REGISTER: + if (SDNode *Res = SelectWriteRegister(Node)) + return Res; + break; + case ISD::ADD: if (SDNode *I = SelectMLAV64LaneV128(Node)) return I; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index e6108c3e95e2..1616ff13535d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -76,6 +76,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); +/// Value type used for condition codes. +static const MVT MVT_CC = MVT::i32; + AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -807,6 +810,9 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; + case AArch64ISD::CCMP: return "AArch64ISD::CCMP"; + case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; + case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP"; case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; case AArch64ISD::FMIN: return "AArch64ISD::FMIN"; case AArch64ISD::FMAX: return "AArch64ISD::FMAX"; @@ -1165,10 +1171,133 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, LHS = LHS.getOperand(0); } - return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) + return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) .getValue(1); } +static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue CCOp, + SDValue Condition, unsigned NZCV, + SDLoc DL, SelectionDAG &DAG) { + unsigned Opcode = 0; + if (LHS.getValueType().isFloatingPoint()) + Opcode = AArch64ISD::FCCMP; + else if (RHS.getOpcode() == ISD::SUB) { + SDValue SubOp0 = RHS.getOperand(0); + if (const ConstantSDNode *SubOp0C = dyn_cast<ConstantSDNode>(SubOp0)) + if (SubOp0C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // See emitComparison() on why we can only do this for SETEQ and SETNE. + Opcode = AArch64ISD::CCMN; + RHS = RHS.getOperand(1); + } + } + if (Opcode == 0) + Opcode = AArch64ISD::CCMP; + + SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); + return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); +} + +/// Returns true if @p Val is a tree of AND/OR/SETCC operations. +static bool isConjunctionDisjunctionTree(const SDValue Val, unsigned Depth) { + if (!Val.hasOneUse()) + return false; + if (Val->getOpcode() == ISD::SETCC) + return true; + // Protect against stack overflow. + if (Depth > 1000) + return false; + if (Val->getOpcode() == ISD::AND || Val->getOpcode() == ISD::OR) { + SDValue O0 = Val->getOperand(0); + SDValue O1 = Val->getOperand(1); + return isConjunctionDisjunctionTree(O0, Depth+1) && + isConjunctionDisjunctionTree(O1, Depth+1); + } + return false; +} + +/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain +/// of CCMP/CFCMP ops. For example (SETCC_0 & SETCC_1) with condition cond0 and +/// cond1 can be transformed into "CMP; CCMP" with CCMP executing on cond_0 +/// and setting flags to inversed(cond_1) otherwise. +/// This recursive function produces DAG nodes that produce condition flags +/// suitable to determine the truth value of @p Val (which is AND/OR/SETCC) +/// by testing the result for the condition set to @p OutCC. If @p Negate is +/// set the opposite truth value is produced. If @p CCOp and @p Condition are +/// given then conditional comparison are created so that false is reported +/// when they are false. +static SDValue emitConjunctionDisjunctionTree( + SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, + SDValue CCOp = SDValue(), AArch64CC::CondCode Condition = AArch64CC::AL) { + assert(isConjunctionDisjunctionTree(Val, 0)); + // We're at a tree leaf, produce a c?f?cmp. + unsigned Opcode = Val->getOpcode(); + if (Opcode == ISD::SETCC) { + SDValue LHS = Val->getOperand(0); + SDValue RHS = Val->getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get(); + bool isInteger = LHS.getValueType().isInteger(); + if (Negate) + CC = getSetCCInverse(CC, isInteger); + SDLoc DL(Val); + // Determine OutCC and handle FP special case. + if (isInteger) { + OutCC = changeIntCCToAArch64CC(CC); + } else { + assert(LHS.getValueType().isFloatingPoint()); + AArch64CC::CondCode ExtraCC; + changeFPCCToAArch64CC(CC, OutCC, ExtraCC); + // Surpisingly some floating point conditions can't be tested with a + // single condition code. Construct an additional comparison in this case. + // See comment below on how we deal with OR conditions. + if (ExtraCC != AArch64CC::AL) { + SDValue ExtraCmp; + if (!CCOp.getNode()) + ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); + else { + SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC); + // Note that we want the inverse of ExtraCC, so NZCV is not inversed. + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC); + ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, + NZCV, DL, DAG); + } + CCOp = ExtraCmp; + Condition = AArch64CC::getInvertedCondCode(ExtraCC); + OutCC = AArch64CC::getInvertedCondCode(OutCC); + } + } + + // Produce a normal comparison if we are first in the chain + if (!CCOp.getNode()) + return emitComparison(LHS, RHS, CC, DL, DAG); + // Otherwise produce a ccmp. + SDValue ConditionOp = DAG.getConstant(Condition, DL, MVT_CC); + AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); + return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL, + DAG); + } + + // Construct comparison sequence for the left hand side. + SDValue LHS = Val->getOperand(0); + SDValue RHS = Val->getOperand(1); + + // We can only implement AND-like behaviour here, but negation is free. So we + // use (not (and (not x) (not y))) to implement (or x y). + bool isOr = Val->getOpcode() == ISD::OR; + assert((isOr || Val->getOpcode() == ISD::AND) && "Should have AND or OR."); + Negate ^= isOr; + + AArch64CC::CondCode RHSCC; + SDValue CmpR = + emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, isOr, CCOp, Condition); + SDValue CmpL = + emitConjunctionDisjunctionTree(DAG, LHS, OutCC, isOr, CmpR, RHSCC); + if (Negate) + OutCC = AArch64CC::getInvertedCondCode(OutCC); + return CmpL; +} + static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { SDValue Cmp; @@ -1227,47 +1356,55 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } } - // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. - // For the i8 operand, the largest immediate is 255, so this can be easily - // encoded in the compare instruction. For the i16 operand, however, the - // largest immediate cannot be encoded in the compare. - // Therefore, use a sign extending load and cmn to avoid materializing the -1 - // constant. For example, - // movz w1, #65535 - // ldrh w0, [x0, #0] - // cmp w0, w1 - // > - // ldrsh w0, [x0, #0] - // cmn w0, #1 - // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) - // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure - // both the LHS and RHS are truely zero extended and to make sure the - // transformation is profitable. if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) { - if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) && - isa<LoadSDNode>(LHS)) { - if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && - cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && - LHS.getNode()->hasNUsesOfValue(1, 0)) { - int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue(); - if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { - SDValue SExt = - DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, - DAG.getValueType(MVT::i16)); - Cmp = emitComparison(SExt, - DAG.getConstant(ValueofRHS, dl, - RHS.getValueType()), - CC, dl, DAG); - AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); - return Cmp; - } + const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS); + + // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. + // For the i8 operand, the largest immediate is 255, so this can be easily + // encoded in the compare instruction. For the i16 operand, however, the + // largest immediate cannot be encoded in the compare. + // Therefore, use a sign extending load and cmn to avoid materializing the + // -1 constant. For example, + // movz w1, #65535 + // ldrh w0, [x0, #0] + // cmp w0, w1 + // > + // ldrsh w0, [x0, #0] + // cmn w0, #1 + // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) + // if and only if (sext LHS) == (sext RHS). The checks are in place to + // ensure both the LHS and RHS are truely zero extended and to make sure the + // transformation is profitable. + if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) && + cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && + cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && + LHS.getNode()->hasNUsesOfValue(1, 0)) { + int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue(); + if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, + DAG.getValueType(MVT::i16)); + Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, + RHS.getValueType()), + CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); + goto CreateCCNode; } } + + if ((RHSC->isNullValue() || RHSC->isOne()) && + isConjunctionDisjunctionTree(LHS, 0)) { + bool Negate = (CC == ISD::SETNE) ^ RHSC->isNullValue(); + Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC, Negate); + goto CreateCCNode; + } } + Cmp = emitComparison(LHS, RHS, CC, dl, DAG); AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); + +CreateCCNode: + AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); return Cmp; } @@ -4065,7 +4202,8 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, .Default(0); if (Reg) return Reg; - report_fatal_error("Invalid register name global variable"); + report_fatal_error(Twine("Invalid register name \"" + + StringRef(RegName) + "\".")); } SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, @@ -6741,7 +6879,8 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // AArch64 has five basic addressing modes: // reg // reg + 9-bit signed offset @@ -6792,7 +6931,8 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, } int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Scaling factors are not free at all. // Operands | Rt Latency // ------------------------------------------- @@ -6800,7 +6940,7 @@ int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, // ------------------------------------------- // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 // Rt, [Xn, Wm, <extend> #imm] | - if (isLegalAddressingMode(AM, Ty)) + if (isLegalAddressingMode(AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 if // it is not equal to 0 or 1. return AM.Scale != 0 && AM.Scale != 1; @@ -9120,3 +9260,8 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { return Ty->isArrayTy(); } + +bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, + EVT) const { + return false; +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 0d9b8b7c875e..db192c78169a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -58,6 +58,11 @@ enum NodeType : unsigned { SBCS, ANDS, + // Conditional compares. Operands: left,right,falsecc,cc,flags + CCMP, + CCMN, + FCCMP, + // Floating point comparison FCMP, @@ -314,14 +319,16 @@ public: /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. /// If the AM is supported, the return value must be >= 0. /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; + int getScalingFactorCost(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be @@ -506,6 +513,8 @@ private: bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + + bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; }; namespace AArch64 { diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 3b8b6681a084..1fe9c7f8cc5a 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -525,6 +525,13 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{ let ParserMatchClass = Imm0_31Operand; } +// True if the 32-bit immediate is in the range [0,31] +def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 32; +}]> { + let ParserMatchClass = Imm0_31Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand<i64>, ImmLeaf<i64, [{ return ((uint64_t)Imm) < 16; @@ -542,7 +549,9 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{ // imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15] def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{ return ((uint32_t)Imm) < 16; -}]>; +}]> { + let ParserMatchClass = Imm0_15Operand; +} // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr @@ -2068,9 +2077,12 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic, //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, +class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype, + string mnemonic, SDNode OpNode> + : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2090,19 +2102,13 @@ class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm> let Inst{3-0} = nzcv; } -multiclass CondSetFlagsImm<bit op, string asm> { - def Wi : BaseCondSetFlagsImm<op, GPR32, asm> { - let Inst{31} = 0; - } - def Xi : BaseCondSetFlagsImm<op, GPR64, asm> { - let Inst{31} = 1; - } -} - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic, + SDNode OpNode> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2122,11 +2128,19 @@ class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm> let Inst{3-0} = nzcv; } -multiclass CondSetFlagsReg<bit op, string asm> { - def Wr : BaseCondSetFlagsReg<op, GPR32, asm> { +multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> { + // immediate operand variants + def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> { + let Inst{31} = 1; + } + // register operand variants + def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> { let Inst{31} = 0; } - def Xr : BaseCondSetFlagsReg<op, GPR64, asm> { + def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> { let Inst{31} = 1; } } @@ -3934,11 +3948,14 @@ multiclass FPComparison<bit signalAllNans, string asm, //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPCondComparison<bit signalAllNans, - RegisterClass regtype, string asm> - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype, + string mnemonic, list<dag> pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, Sched<[WriteFCmp]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + bits<5> Rn; bits<5> Rm; bits<4> nzcv; @@ -3954,16 +3971,18 @@ class BaseFPCondComparison<bit signalAllNans, let Inst{3-0} = nzcv; } -multiclass FPCondComparison<bit signalAllNans, string asm> { - let Defs = [NZCV], Uses = [NZCV] in { - def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> { +multiclass FPCondComparison<bit signalAllNans, string mnemonic, + SDPatternOperator OpNode = null_frag> { + def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic, + [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]> { let Inst{22} = 0; } - - def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> { + def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic, + [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]> { let Inst{22} = 1; } - } // Defs = [NZCV], Uses = [NZCV] } //--- @@ -8822,6 +8841,178 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode> [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>; } // end of 'let Predicates = [HasCrypto]' +//---------------------------------------------------------------------------- +// v8.1 atomic instructions extension: +// * CAS +// * CASP +// * SWP +// * LDOPregister<OP>, and aliases STOPregister<OP> + +// Instruction encodings: +// +// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0 +// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt +// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt +// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt +// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt +// ST SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |11111 + +// Instruction syntax: +// +// CAS{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// CAS{<order>} <Xs>, <Xt>, [<Xn|SP>] +// CASP{<order>} <Ws>, <W(s+1)>, <Wt>, <W(t+1)>, [<Xn|SP>] +// CASP{<order>} <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>] +// SWP{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// SWP{<order>} <Xs>, <Xt>, [<Xn|SP>] +// LD<OP>{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// LD<OP>{<order>} <Xs>, <Xt>, [<Xn|SP>] +// ST<OP>{<order>}[<size>] <Ws>, [<Xn|SP>] +// ST<OP>{<order>} <Xs>, [<Xn|SP>] + +let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseCASEncoding<dag oops, dag iops, string asm, string operands, + string cstr, list<dag> pattern> + : I<oops, iops, asm, operands, cstr, pattern> { + bits<2> Sz; + bit NP; + bit Acq; + bit Rel; + bits<5> Rs; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b001000; + let Inst{23} = NP; + let Inst{22} = Acq; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = Rel; + let Inst{14-10} = 0b11111; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class BaseCAS<string order, string size, RegisterClass RC> + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "cas" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]> { + let NP = 1; +} + +multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseCAS<order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseCAS<order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseCAS<order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseCAS<order, "", GPR64>; +} + +class BaseCASP<string order, string size, RegisterOperand RC> + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "casp" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]> { + let NP = 0; +} + +multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in + def s : BaseCASP<order, "", WSeqPairClassOperand>; + let Sz = 0b01, Acq = Acq, Rel = Rel in + def d : BaseCASP<order, "", XSeqPairClassOperand>; +} + +let Predicates = [HasV8_1a] in +class BaseSWP<string order, string size, RegisterClass RC> + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, + "\t$Rs, $Rt, [$Rn]","",[]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc = 0b000; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b1; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Swap<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseSWP<order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseSWP<order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseSWP<order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseSWP<order, "", GPR64>; +} + +let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseLDOPregister<string op, string order, string size, RegisterClass RC> + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, + "\t$Rs, $Rt, [$Rn]","",[]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel, + string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in + def b : BaseLDOPregister<op, order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in + def h : BaseLDOPregister<op, order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in + def s : BaseLDOPregister<op, order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in + def d : BaseLDOPregister<op, order, "", GPR64>; +} + +let Predicates = [HasV8_1a] in +class BaseSTOPregister<string asm, RegisterClass OP, Register Reg, + Instruction inst> : + InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>; + +multiclass STOPregister<string asm, string instr> { + def : BaseSTOPregister<asm # "lb", GPR32, WZR, + !cast<Instruction>(instr # "Lb")>; + def : BaseSTOPregister<asm # "lh", GPR32, WZR, + !cast<Instruction>(instr # "Lh")>; + def : BaseSTOPregister<asm # "l", GPR32, WZR, + !cast<Instruction>(instr # "Ls")>; + def : BaseSTOPregister<asm # "l", GPR64, XZR, + !cast<Instruction>(instr # "Ld")>; + def : BaseSTOPregister<asm # "b", GPR32, WZR, + !cast<Instruction>(instr # "b")>; + def : BaseSTOPregister<asm # "h", GPR32, WZR, + !cast<Instruction>(instr # "h")>; + def : BaseSTOPregister<asm, GPR32, WZR, + !cast<Instruction>(instr # "s")>; + def : BaseSTOPregister<asm, GPR64, XZR, + !cast<Instruction>(instr # "d")>; +} + +//---------------------------------------------------------------------------- // Allow the size specifier tokens to be upper case, not just lower. def : TokenAlias<".8B", ".8b">; def : TokenAlias<".4H", ".4h">; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 207c34ca7f0b..6941a6bf1b47 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2066,10 +2066,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, .setMIFlag(Flag); } -MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // This is a bit of a hack. Consider this instruction: // // %vreg0<def> = COPY %SP; GPR64all:%vreg0 diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index fa4b8b7e6179..d296768ab9b0 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -131,6 +131,7 @@ public: using TargetInstrInfo::foldMemoryOperandImpl; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index c7d6a69b9fd7..2f1b8933bf61 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -66,6 +66,20 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4, SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisVT<4, i32>]>; +def SDT_AArch64CCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; +def SDT_AArch64FCCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; def SDT_AArch64FCmp : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; @@ -160,6 +174,10 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; +def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; +def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; +def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; + def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; @@ -727,6 +745,74 @@ def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; +// v8.1 atomic CAS +defm CAS : CompareAndSwap<0, 0, "">; +defm CASA : CompareAndSwap<1, 0, "a">; +defm CASL : CompareAndSwap<0, 1, "l">; +defm CASAL : CompareAndSwap<1, 1, "al">; + +// v8.1 atomic CASP +defm CASP : CompareAndSwapPair<0, 0, "">; +defm CASPA : CompareAndSwapPair<1, 0, "a">; +defm CASPL : CompareAndSwapPair<0, 1, "l">; +defm CASPAL : CompareAndSwapPair<1, 1, "al">; + +// v8.1 atomic SWP +defm SWP : Swap<0, 0, "">; +defm SWPA : Swap<1, 0, "a">; +defm SWPL : Swap<0, 1, "l">; +defm SWPAL : Swap<1, 1, "al">; + +// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) +defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; +defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; +defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; +defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; + +defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; +defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; +defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; +defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; + +defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; +defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; +defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; +defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; + +defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; +defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; +defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; +defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; + +defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; +defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; +defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; +defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; + +defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; +defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; +defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; +defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; + +defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; +defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; +defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; +defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; + +defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; +defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; +defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; +defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; + +// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" +defm : STOPregister<"stadd","LDADD">; // STADDx +defm : STOPregister<"stclr","LDCLR">; // STCLRx +defm : STOPregister<"steor","LDEOR">; // STEORx +defm : STOPregister<"stset","LDSET">; // STSETx +defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx +defm : STOPregister<"stsmin","LDSMIN">;// STSMINx +defm : STOPregister<"stumax","LDUMAX">;// STUMAXx +defm : STOPregister<"stumin","LDUMIN">;// STUMINx //===----------------------------------------------------------------------===// // Logical instructions. @@ -950,13 +1036,10 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; //===----------------------------------------------------------------------===// -// Conditionally set flags instructions. +// Conditional comparison instructions. //===----------------------------------------------------------------------===// -defm CCMN : CondSetFlagsImm<0, "ccmn">; -defm CCMP : CondSetFlagsImm<1, "ccmp">; - -defm CCMN : CondSetFlagsReg<0, "ccmn">; -defm CCMP : CondSetFlagsReg<1, "ccmp">; +defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; +defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; //===----------------------------------------------------------------------===// // Conditional select instructions. @@ -2486,7 +2569,7 @@ defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; //===----------------------------------------------------------------------===// defm FCCMPE : FPCondComparison<1, "fccmpe">; -defm FCCMP : FPCondComparison<0, "fccmp">; +defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; //===----------------------------------------------------------------------===// // Floating point conditional select instruction. diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index 72edbf14c0d8..e55ae991b635 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -69,10 +69,10 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, AArch64II::MO_PAGEOFF) RefKind = MCSymbolRefExpr::VK_PAGEOFF; } - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); return MCOperand::createExpr(Expr); } @@ -139,14 +139,14 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_NC; const MCExpr *Expr = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); AArch64MCExpr::VariantKind RefKind; RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags); - Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx); + Expr = AArch64MCExpr::create(Expr, RefKind, Ctx); return MCOperand::createExpr(Expr); } @@ -179,7 +179,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::createExpr( - MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index d5ff3f1f3373..b2efca023372 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -26,8 +26,12 @@ let Namespace = "AArch64" in { def hsub : SubRegIndex<16>; def ssub : SubRegIndex<32>; def dsub : SubRegIndex<32>; + def sube32 : SubRegIndex<32>; + def subo32 : SubRegIndex<32>; def qhisub : SubRegIndex<64>; def qsub : SubRegIndex<64>; + def sube64 : SubRegIndex<64>; + def subo64 : SubRegIndex<64>; // Note: Code depends on these having consecutive numbers def dsub0 : SubRegIndex<64>; def dsub1 : SubRegIndex<64>; @@ -592,3 +596,40 @@ def FPR16Op : RegisterOperand<FPR16, "printOperand">; def FPR32Op : RegisterOperand<FPR32, "printOperand">; def FPR64Op : RegisterOperand<FPR64, "printOperand">; def FPR128Op : RegisterOperand<FPR128, "printOperand">; + + +//===----------------------------------------------------------------------===// +// ARMv8.1a atomic CASP register operands + + +def WSeqPairs : RegisterTuples<[sube32, subo32], + [(rotl GPR32, 0), (rotl GPR32, 1)]>; +def XSeqPairs : RegisterTuples<[sube64, subo64], + [(rotl GPR64, 0), (rotl GPR64, 1)]>; + +def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32, + (add WSeqPairs)>{ + let Size = 64; +} +def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64, + (add XSeqPairs)>{ + let Size = 128; +} + + +let RenderMethod = "addRegOperands", ParserMethod="tryParseGPRSeqPair" in { + def WSeqPairsAsmOperandClass : AsmOperandClass { let Name = "WSeqPair"; } + def XSeqPairsAsmOperandClass : AsmOperandClass { let Name = "XSeqPair"; } +} + +def WSeqPairClassOperand : + RegisterOperand<WSeqPairsClass, "printGPRSeqPairsClassOperand<32>"> { + let ParserMatchClass = WSeqPairsAsmOperandClass; +} +def XSeqPairClassOperand : + RegisterOperand<XSeqPairsClass, "printGPRSeqPairsClassOperand<64>"> { + let ParserMatchClass = XSeqPairsAsmOperandClass; +} + + +//===----- END: v8.1a atomic CASP register operands -----------------------===// diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index a9059ab37f5f..f23dd33d0146 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -250,10 +250,14 @@ bool AArch64PassConfig::addPreISel() { // FIXME: On AArch64, this depends on the type. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. - if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + if ((TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge == cl::BOU_UNSET) || - EnableGlobalMerge == cl::BOU_TRUE) - addPass(createGlobalMergePass(TM, 4095)); + EnableGlobalMerge == cl::BOU_TRUE) { + bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && + (EnableGlobalMerge == cl::BOU_UNSET); + addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize)); + } + if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 299b4a55dd82..18ee4a9c72b5 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -40,11 +40,11 @@ const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference( if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { const MCSymbol *Sym = TM.getSymbol(GV, Mang); const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); MCSymbol *PCSym = getContext().createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); - return MCBinaryExpr::CreateSub(Res, PC, getContext()); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext()); + return MCBinaryExpr::createSub(Res, PC, getContext()); } return TargetLoweringObjectFileMachO::getTTypeGlobalReference( @@ -65,9 +65,9 @@ const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( // On ARM64 Darwin, we can reference symbols with foo@GOT-., which // is an indirect pc-relative reference. const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); MCSymbol *PCSym = getContext().createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); - return MCBinaryExpr::CreateSub(Res, PC, getContext()); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext()); + return MCBinaryExpr::createSub(Res, PC, getContext()); } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 38d34e65a2e4..063c053ffe8a 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -107,6 +107,7 @@ private: OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands); OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands); bool tryParseVectorRegister(OperandVector &Operands); + OperandMatchResultTy tryParseGPRSeqPair(OperandVector &Operands); public: enum AArch64MatchResultTy { @@ -875,6 +876,16 @@ public: return Kind == k_Register && !Reg.isVector && AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum); } + bool isWSeqPair() const { + return Kind == k_Register && !Reg.isVector && + AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains( + Reg.RegNum); + } + bool isXSeqPair() const { + return Kind == k_Register && !Reg.isVector && + AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains( + Reg.RegNum); + } bool isGPR64sp0() const { return Kind == k_Register && !Reg.isVector && @@ -1753,7 +1764,7 @@ static unsigned MatchRegisterName(StringRef Name); /// } static unsigned matchVectorRegName(StringRef Name) { - return StringSwitch<unsigned>(Name) + return StringSwitch<unsigned>(Name.lower()) .Case("v0", AArch64::Q0) .Case("v1", AArch64::Q1) .Case("v2", AArch64::Q2) @@ -2024,7 +2035,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { // No modifier was specified at all; this is the syntax for an ELF basic // ADRP relocation (unfortunately). Expr = - AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext()); + AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext()); } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE || DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) && Addend != 0) { @@ -2157,7 +2168,7 @@ AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) { if (MCE) { int64_t Val = MCE->getValue(); if (Val > 0xfff && (Val & 0xfff) == 0) { - Imm = MCConstantExpr::Create(Val >> 12, getContext()); + Imm = MCConstantExpr::create(Val >> 12, getContext()); ShiftAmount = 12; } } @@ -2347,14 +2358,14 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, #define SYS_ALIAS(op1, Cn, Cm, op2) \ do { \ - Expr = MCConstantExpr::Create(op1, getContext()); \ + Expr = MCConstantExpr::create(op1, getContext()); \ Operands.push_back( \ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ Operands.push_back( \ AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \ Operands.push_back( \ AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \ - Expr = MCConstantExpr::Create(op2, getContext()); \ + Expr = MCConstantExpr::create(op2, getContext()); \ Operands.push_back( \ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ } while (0) @@ -2835,7 +2846,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { return true; if (HasELFModifier) - ImmVal = AArch64MCExpr::Create(ImmVal, RefKind, getContext()); + ImmVal = AArch64MCExpr::create(ImmVal, RefKind, getContext()); return false; } @@ -3128,7 +3139,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, if (ShiftAmt <= MaxShiftAmt && Imm <= 0xFFFF) { Operands[0] = AArch64Operand::CreateToken("movz", false, Loc, Ctx); Operands.push_back(AArch64Operand::CreateImm( - MCConstantExpr::Create(Imm, Ctx), S, E, Ctx)); + MCConstantExpr::create(Imm, Ctx), S, E, Ctx)); if (ShiftAmt) Operands.push_back(AArch64Operand::CreateShiftExtend(AArch64_AM::LSL, ShiftAmt, true, S, E, Ctx)); @@ -3634,8 +3645,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, NewOp4Val = 63 - Op3Val; } - const MCExpr *NewOp3 = MCConstantExpr::Create(NewOp3Val, getContext()); - const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); + const MCExpr *NewOp3 = MCConstantExpr::create(NewOp3Val, getContext()); + const MCExpr *NewOp4 = MCConstantExpr::create(NewOp4Val, getContext()); Operands[0] = AArch64Operand::CreateToken( "ubfm", false, Op.getStartLoc(), getContext()); @@ -3685,8 +3696,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(WidthOp.getStartLoc(), "requested insert overflows register"); - const MCExpr *ImmRExpr = MCConstantExpr::Create(ImmR, getContext()); - const MCExpr *ImmSExpr = MCConstantExpr::Create(ImmS, getContext()); + const MCExpr *ImmRExpr = MCConstantExpr::create(ImmR, getContext()); + const MCExpr *ImmSExpr = MCConstantExpr::create(ImmS, getContext()); Operands[0] = AArch64Operand::CreateToken( "bfm", false, Op.getStartLoc(), getContext()); Operands[2] = AArch64Operand::CreateReg( @@ -3742,9 +3753,9 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "requested insert overflows register"); const MCExpr *NewOp3 = - MCConstantExpr::Create(NewOp3Val, getContext()); + MCConstantExpr::create(NewOp3Val, getContext()); const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); + MCConstantExpr::create(NewOp4Val, getContext()); Operands[3] = AArch64Operand::CreateImm( NewOp3, Op3.getStartLoc(), Op3.getEndLoc(), getContext()); Operands[4] = AArch64Operand::CreateImm( @@ -3800,7 +3811,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "requested extract overflows register"); const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); + MCConstantExpr::create(NewOp4Val, getContext()); Operands[4] = AArch64Operand::CreateImm( NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext()); if (Tok == "bfxil") @@ -4021,7 +4032,7 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".ltorg" || IDVal == ".pool") return parseDirectiveLtorg(Loc); if (IDVal == ".unreq") - return parseDirectiveUnreq(DirectiveID.getLoc()); + return parseDirectiveUnreq(Loc); if (!IsMachO && !IsCOFF) { if (IDVal == ".inst") @@ -4106,8 +4117,8 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { return Error(L, "expected symbol after directive"); MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); - Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext()); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, getContext()); + Expr = AArch64MCExpr::create(Expr, AArch64MCExpr::VK_TLSDESC, getContext()); MCInst Inst; Inst.setOpcode(AArch64::TLSDESCCALL); @@ -4354,3 +4365,77 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, return Match_Success; return Match_InvalidOperand; } + + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) { + + SMLoc S = getLoc(); + + if (getParser().getTok().isNot(AsmToken::Identifier)) { + Error(S, "expected register"); + return MatchOperand_ParseFail; + } + + int FirstReg = tryParseRegister(); + if (FirstReg == -1) { + return MatchOperand_ParseFail; + } + const MCRegisterClass &WRegClass = + AArch64MCRegisterClasses[AArch64::GPR32RegClassID]; + const MCRegisterClass &XRegClass = + AArch64MCRegisterClasses[AArch64::GPR64RegClassID]; + + bool isXReg = XRegClass.contains(FirstReg), + isWReg = WRegClass.contains(FirstReg); + if (!isXReg && !isWReg) { + Error(S, "expected first even register of a " + "consecutive same-size even/odd register pair"); + return MatchOperand_ParseFail; + } + + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + unsigned FirstEncoding = RI->getEncodingValue(FirstReg); + + if (FirstEncoding & 0x1) { + Error(S, "expected first even register of a " + "consecutive same-size even/odd register pair"); + return MatchOperand_ParseFail; + } + + SMLoc M = getLoc(); + if (getParser().getTok().isNot(AsmToken::Comma)) { + Error(M, "expected comma"); + return MatchOperand_ParseFail; + } + // Eat the comma + getParser().Lex(); + + SMLoc E = getLoc(); + int SecondReg = tryParseRegister(); + if (SecondReg ==-1) { + return MatchOperand_ParseFail; + } + + if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 || + (isXReg && !XRegClass.contains(SecondReg)) || + (isWReg && !WRegClass.contains(SecondReg))) { + Error(E,"expected second odd register of a " + "consecutive same-size even/odd register pair"); + return MatchOperand_ParseFail; + } + + unsigned Pair = 0; + if(isXReg) { + Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64, + &AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID]); + } else { + Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube32, + &AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID]); + } + + Operands.push_back(AArch64Operand::CreateReg(Pair, false, S, getLoc(), + getContext())); + + return MatchOperand_Success; +} diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index a1ed703d1bf4..359c2e734e21 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -169,6 +169,14 @@ static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder); static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Addr, + const void *Decoder); +static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Addr, + const void *Decoder); static bool Check(DecodeStatus &Out, DecodeStatus In) { switch (In) { @@ -1543,3 +1551,35 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, return Success; } + +static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegClassID, + unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + // Register number must be even (see CASP instruction) + if (RegNo & 0x1) + return Fail; + + unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo); + Inst.addOperand(MCOperand::createReg(Register)); + return Success; +} + +static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + return DecodeGPRSeqPairsClassRegisterClass(Inst, + AArch64::WSeqPairsClassRegClassID, + RegNo, Addr, Decoder); +} + +static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + return DecodeGPRSeqPairsClassRegisterClass(Inst, + AArch64::XSeqPairsClassRegClassID, + RegNo, Addr, Decoder); +} diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 07e4a45292fa..eb05ed915ddb 100644 --- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -168,11 +168,11 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); if (Variant != MCSymbolRefExpr::VK_None) - Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx); + Add = MCSymbolRefExpr::create(Sym, Variant, Ctx); else - Add = MCSymbolRefExpr::Create(Sym, Ctx); + Add = MCSymbolRefExpr::create(Sym, Ctx); } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx); + Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx); } } @@ -181,37 +181,37 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( if (SymbolicOp.SubtractSymbol.Name) { StringRef Name(SymbolicOp.SubtractSymbol.Name); MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, Ctx); + Sub = MCSymbolRefExpr::create(Sym, Ctx); } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx); + Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx); } } const MCExpr *Off = nullptr; if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); + Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); const MCExpr *Expr; if (Sub) { const MCExpr *LHS; if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); + LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); else - LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); + LHS = MCUnaryExpr::createMinus(Sub, Ctx); if (Off) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); + Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); else Expr = LHS; } else if (Add) { if (Off) - Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); + Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); else Expr = Add; } else { if (Off) Expr = Off; else - Expr = MCConstantExpr::Create(0, Ctx); + Expr = MCConstantExpr::create(0, Ctx); } MI.addOperand(MCOperand::createExpr(Expr)); diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt index 62827e8f50eb..73665eb5701a 100644 --- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt +++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AArch64Disassembler parent = AArch64 -required_libraries = AArch64Info AArch64Utils MC MCDisassembler Support +required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCDisassembler Support add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 02bd929dc65d..96fbe3a9af4d 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -206,15 +206,15 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, else O << "\tmovn\t"; - O << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(1).getExpr(); + O << getRegisterName(MI->getOperand(0).getReg()) << ", #"; + MI->getOperand(1).getExpr()->print(O, &MAI); return; } if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) && MI->getOperand(2).isExpr()) { - O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(2).getExpr(); + O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"; + MI->getOperand(2).getExpr()->print(O, &MAI); return; } @@ -908,7 +908,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '#' << Op.getImm(); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } @@ -966,7 +966,7 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, *CommentStream << '=' << (Val << Shift) << '\n'; } else { assert(MO.isExpr() && "Unexpected operand type!"); - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); printShifter(MI, OpNum + 1, STI, O); } } @@ -1091,7 +1091,7 @@ void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum, O << "#" << (MO.getImm() * Scale); } else { assert(MO.isExpr() && "Unexpected operand type!"); - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); } } @@ -1103,7 +1103,8 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, O << ", #" << (MO1.getImm() * Scale); } else { assert(MO1.isExpr() && "Unexpected operand type!"); - O << ", " << *MO1.getExpr(); + O << ", "; + MO1.getExpr()->print(O, &MAI); } O << ']'; } @@ -1113,7 +1114,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned prfop = MI->getOperand(OpNum).getImm(); bool Valid; - StringRef Name = + StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid); if (Valid) O << Name; @@ -1177,6 +1178,23 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { return Reg; } +template<unsigned size> +void AArch64InstPrinter::printGPRSeqPairsClassOperand(const MCInst *MI, + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + static_assert(size == 64 || size == 32, + "Template parameter must be either 32 or 64"); + unsigned Reg = MI->getOperand(OpNum).getReg(); + + unsigned Sube = (size == 32) ? AArch64::sube32 : AArch64::sube64; + unsigned Subo = (size == 32) ? AArch64::subo32 : AArch64::subo64; + + unsigned Even = MRI.getSubReg(Reg, Sube); + unsigned Odd = MRI.getSubReg(Reg, Subo); + O << getRegisterName(Even) << ", " << getRegisterName(Odd); +} + void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O, @@ -1264,12 +1282,12 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr()); int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) { O << "0x"; O.write_hex(Address); } else { // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); + MI->getOperand(OpNum).getExpr()->print(O, &MAI); } } @@ -1286,7 +1304,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, } // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); + MI->getOperand(OpNum).getExpr()->print(O, &MAI); } void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, @@ -1298,10 +1316,10 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, bool Valid; StringRef Name; if (Opcode == AArch64::ISB) - Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), + Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), Valid); else - Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), + Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), Valid); if (Valid) O << Name; @@ -1337,7 +1355,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, unsigned Val = MI->getOperand(OpNo).getImm(); bool Valid; - StringRef Name = + StringRef Name = AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid); if (Valid) O << StringRef(Name.str()).upper(); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index c2077a0fe557..15dee978e229 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -153,6 +153,10 @@ protected: const MCSubtargetInfo &STI, raw_ostream &O); void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template<unsigned size> + void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O); }; class AArch64AppleInstPrinter : public AArch64InstPrinter { diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt index 573fa10561cf..642c18394a67 100644 --- a/lib/Target/AArch64/LLVMBuild.txt +++ b/lib/Target/AArch64/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = AArch64CodeGen parent = AArch64 -required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target +required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 31fceb653a12..6c15bf3afb2d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -252,7 +252,7 @@ bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { // We are properly aligned, so write NOPs as requested. Count /= 4; for (uint64_t i = 0; i != Count; ++i) - OW->Write32(0xd503201f); + OW->write32(0xd503201f); return true; } @@ -496,7 +496,7 @@ void ELFAArch64AsmBackend::processFixupValue( // FIXME: Should be replaced with something more principled. static bool isByteSwappedFixup(const MCExpr *E) { MCValue Val; - if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr)) + if (!E->evaluateAsRelocatable(Val, nullptr, nullptr)) return false; if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined()) diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 204a1abe72b5..78837de18b97 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -23,16 +23,14 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" @@ -161,18 +159,18 @@ private: MCSymbol *Start = getContext().createTempSymbol(); EmitLabel(Start); - MCSymbol *Symbol = getContext().getOrCreateSymbol( - Name + "." + Twine(MappingSymbolCounter++)); + auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++))); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetType(SD, ELF::STT_NOTYPE); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); + getAssembler().registerSymbol(*Symbol); + Symbol->setType(ELF::STT_NOTYPE); + Symbol->setBinding(ELF::STB_LOCAL); + Symbol->setExternal(false); auto Sec = getCurrentSection().first; assert(Sec && "need a section"); Symbol->setSection(*Sec); - const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext()); Symbol->setVariableValue(Value); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index ab2cad6547fa..921c4b94a729 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -62,15 +62,14 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol( // version. MCContext &Context = Streamer.getContext(); const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, Context); MCSymbol *PCSym = Context.createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); - return MCBinaryExpr::CreateSub(Res, PC, Context); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, Context); + return MCBinaryExpr::createSub(Res, PC, Context); } -AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) { - Triple T(TT); +AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { if (T.getArch() == Triple::aarch64_be) IsLittleEndian = false; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 9b88de7dabbc..253cd30f26ee 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -18,9 +18,10 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { -class Target; -class StringRef; class MCStreamer; +class Target; +class Triple; + struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { explicit AArch64MCAsmInfoDarwin(); const MCExpr * @@ -29,7 +30,7 @@ struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { }; struct AArch64MCAsmInfoELF : public MCAsmInfoELF { - explicit AArch64MCAsmInfoELF(StringRef TT); + explicit AArch64MCAsmInfoELF(const Triple &T); }; } // namespace llvm diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 277ea9fbace2..7d8e79bc63c8 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -174,16 +175,6 @@ public: unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue, const MCSubtargetInfo &STI) const; - void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; } - - void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const { - // Output the constant in little endian byte order. - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, OS); - Val >>= 8; - } - } - void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; @@ -611,7 +602,7 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, } uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - EmitConstant(Binary, 4, OS); + support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); ++MCNumEmitted; // Keep track of the # of mi's emitted. } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 74b81af2cb4d..28703419514a 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -15,9 +15,8 @@ #include "AArch64MCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -26,7 +25,7 @@ using namespace llvm; #define DEBUG_TYPE "aarch64symbolrefexpr" -const AArch64MCExpr *AArch64MCExpr::Create(const MCExpr *Expr, VariantKind Kind, +const AArch64MCExpr *AArch64MCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) { return new (Ctx) AArch64MCExpr(Expr, Kind); } @@ -76,24 +75,24 @@ StringRef AArch64MCExpr::getVariantKindName() const { } } -void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { +void AArch64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { if (getKind() != VK_NONE) OS << getVariantKindName(); - OS << *Expr; + Expr->print(OS, MAI); } void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const { Streamer.visitUsedExpr(*getSubExpr()); } -MCSection *AArch64MCExpr::FindAssociatedSection() const { +MCSection *AArch64MCExpr::findAssociatedSection() const { llvm_unreachable("FIXME: what goes here?"); } -bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +bool AArch64MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { - if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup)) + if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup)) return false; Res = @@ -121,8 +120,7 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { // We're known to be under a TLS fixup, so any symbol should be // modified. There should be only one. const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); - MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); + cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS); break; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index 95d22775736c..1165314e4105 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -112,7 +112,7 @@ public: /// @name Construction /// @{ - static const AArch64MCExpr *Create(const MCExpr *Expr, VariantKind Kind, + static const AArch64MCExpr *create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx); /// @} @@ -145,13 +145,13 @@ public: /// (e.g. ":got:", ":lo12:"). StringRef getVariantKindName() const; - void PrintImpl(raw_ostream &OS) const override; + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override; + MCSection *findAssociatedSection() const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 2e22de08537b..f89a85273872 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -58,15 +58,13 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { } static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { - Triple TheTriple(TT); - + const Triple &TheTriple) { MCAsmInfo *MAI; if (TheTriple.isOSDarwin()) MAI = new AArch64MCAsmInfoDarwin(); else { assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); - MAI = new AArch64MCAsmInfoELF(TT); + MAI = new AArch64MCAsmInfoELF(TheTriple); } // Initial state of the frame pointer is SP. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index d425975e7cb0..67af810bbbec 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -31,10 +31,9 @@ class AArch64MachObjectWriter : public MCMachObjectTargetWriter { public: AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) - : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/true) {} + : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; @@ -140,7 +139,7 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section, return false; } -void AArch64MachObjectWriter::RecordRelocation( +void AArch64MachObjectWriter::recordRelocation( MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { @@ -209,11 +208,9 @@ void AArch64MachObjectWriter::RecordRelocation( } } else if (Target.getSymB()) { // A - B + constant const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData &A_SD = Asm.getSymbolData(*A); const MCSymbol *A_Base = Asm.getAtom(*A); const MCSymbol *B = &Target.getSymB()->getSymbol(); - const MCSymbolData &B_SD = Asm.getSymbolData(*B); const MCSymbol *B_Base = Asm.getAtom(*B); // Check for "_foo@got - .", which comes through here as: @@ -264,14 +261,12 @@ void AArch64MachObjectWriter::RecordRelocation( Asm.getContext().reportFatalError(Fixup.getLoc(), "unsupported relocation with identical base"); - Value += (!A_SD.getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) - - (!A_Base || !A_Base->getData().getFragment() - ? 0 - : Writer->getSymbolAddress(*A_Base, Layout)); - Value -= (!B_SD.getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) - - (!B_Base || !B_Base->getData().getFragment() - ? 0 - : Writer->getSymbolAddress(*B_Base, Layout)); + Value += (!A->getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) - + (!A_Base || !A_Base->getFragment() ? 0 : Writer->getSymbolAddress( + *A_Base, Layout)); + Value -= (!B->getFragment() ? 0 : Writer->getSymbolAddress(*B, Layout)) - + (!B_Base || !B_Base->getFragment() ? 0 : Writer->getSymbolAddress( + *B_Base, Layout)); Type = MachO::ARM64_RELOC_UNSIGNED; @@ -304,7 +299,7 @@ void AArch64MachObjectWriter::RecordRelocation( // If the evaluation is an absolute value, just use that directly // to keep things easy. int64_t Res; - if (Symbol->getVariableValue()->EvaluateAsAbsolute( + if (Symbol->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; @@ -313,12 +308,12 @@ void AArch64MachObjectWriter::RecordRelocation( // FIXME: Will the Target we already have ever have any data in it // we need to preserve and merge with the new Target? How about // the FixedValue? - if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout, + if (!Symbol->getVariableValue()->evaluateAsRelocatable(Target, &Layout, &Fixup)) Asm.getContext().reportFatalError(Fixup.getLoc(), "unable to resolve variable '" + Symbol->getName() + "'"); - return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + return recordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); } @@ -360,7 +355,7 @@ void AArch64MachObjectWriter::RecordRelocation( // Resolve constant variables. if (Symbol->isVariable()) { int64_t Res; - if (Symbol->getVariableValue()->EvaluateAsAbsolute( + if (Symbol->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 28b8e7e29fe2..ee85b65bf39a 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -175,6 +175,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { {"id_mmfr1_el1", ID_MMFR1_EL1, {}}, {"id_mmfr2_el1", ID_MMFR2_EL1, {}}, {"id_mmfr3_el1", ID_MMFR3_EL1, {}}, + {"id_mmfr4_el1", ID_MMFR4_EL1, {}}, {"id_isar0_el1", ID_ISAR0_EL1, {}}, {"id_isar1_el1", ID_ISAR1_EL1, {}}, {"id_isar2_el1", ID_ISAR2_EL1, {}}, diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 7125f14f1a2d..7e42f8e3601e 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -603,6 +603,7 @@ namespace AArch64SysReg { ISR_EL1 = 0xc608, // 11 000 1100 0001 000 CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010 + ID_MMFR4_EL1 = 0xc016, // 11 000 0000 0010 110 // Trace registers TRCSTATR = 0x8818, // 10 001 0000 0011 000 diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index d3cc068993e0..9550a3a3cad1 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -16,11 +16,13 @@ #define LLVM_LIB_TARGET_ARM_ARM_H #include "llvm/Support/CodeGen.h" +#include <functional> namespace llvm { class ARMAsmPrinter; class ARMBaseTargetMachine; +class Function; class FunctionPass; class ImmutablePass; class MachineInstr; @@ -38,7 +40,8 @@ FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createARMOptimizeBarriersPass(); -FunctionPass *createThumb2SizeReductionPass(); +FunctionPass *createThumb2SizeReductionPass( + std::function<bool(const Function &)> Ftor = nullptr); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 04503b89de73..d84f2961d810 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -87,7 +87,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); assert(GV && "C++ constructor pointer was not a GlobalValue!"); - const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV, + const MCExpr *E = MCSymbolRefExpr::create(GetARMGVSymbol(GV, ARMII::MO_NO_FLAG), (Subtarget->isTargetELF() ? MCSymbolRefExpr::VK_ARM_TARGET1 @@ -173,7 +173,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, break; } case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); @@ -181,7 +181,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << ":lower16:"; else if (TF & ARMII::MO_HI16) O << ":upper16:"; - O << *GetARMGVSymbol(GV, TF); + GetARMGVSymbol(GV, TF)->print(O, MAI); printOffset(MO.getOffset(), O); if (TF == ARMII::MO_PLT) @@ -189,7 +189,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, break; } case MachineOperand::MO_ConstantPoolIndex: - O << *GetCPISymbol(MO.getIndex()); + GetCPISymbol(MO.getIndex())->print(O, MAI); break; } } @@ -467,7 +467,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, // using NLPs; however, sometimes the types are local to the file. // We need to fill in the value for the NLP in those cases. OutStreamer.EmitValue( - MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()), + MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()), 4 /*size*/); } @@ -640,9 +640,13 @@ void ARMAsmPrinter::emitAttributes() { if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one // FPU, but there are two different names for it depending on the CPU. - ATS.emitFPU(STI.hasD16() ? ARM::FK_FPV5_D16 : ARM::FK_FP_ARMV8); + ATS.emitFPU(STI.hasD16() + ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16) + : ARM::FK_FP_ARMV8); else if (STI.hasVFP4()) - ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV4_D16 : ARM::FK_VFPV4); + ATS.emitFPU(STI.hasD16() + ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) + : ARM::FK_VFPV4); else if (STI.hasVFP3()) ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3); else if (STI.hasVFP2()) @@ -895,7 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { // Create an MCSymbol for the reference. const MCExpr *Expr = - MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()), + MCSymbolRefExpr::create(MCSym, getModifierVariantKind(ACPV->getModifier()), OutContext); if (ACPV->getPCAdjustment()) { @@ -903,10 +907,10 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { getFunctionNumber(), ACPV->getLabelId(), OutContext); - const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext); + const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext); PCRelExpr = - MCBinaryExpr::CreateAdd(PCRelExpr, - MCConstantExpr::Create(ACPV->getPCAdjustment(), + MCBinaryExpr::createAdd(PCRelExpr, + MCConstantExpr::create(ACPV->getPCAdjustment(), OutContext), OutContext); if (ACPV->mustAddCurrentAddress()) { @@ -914,25 +918,22 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { // label, so just emit a local label end reference that instead. MCSymbol *DotSym = OutContext.createTempSymbol(); OutStreamer->EmitLabel(DotSym); - const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); - PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext); + const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); + PCRelExpr = MCBinaryExpr::createSub(PCRelExpr, DotExpr, OutContext); } - Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext); + Expr = MCBinaryExpr::createSub(Expr, PCRelExpr, OutContext); } OutStreamer->EmitValue(Expr, Size); } -void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { - unsigned Opcode = MI->getOpcode(); - int OpNum = 1; - if (Opcode == ARM::BR_JTadd) - OpNum = 2; - else if (Opcode == ARM::BR_JTm) - OpNum = 3; - - const MachineOperand &MO1 = MI->getOperand(OpNum); +void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) { + const MachineOperand &MO1 = MI->getOperand(1); unsigned JTI = MO1.getIndex(); + // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for + // ARM mode tables. + EmitAlignment(2); + // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); OutStreamer->EmitLabel(JTISymbol); @@ -955,16 +956,16 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { // LJTI_0_0: // .word (LBB0 - LJTI_0_0) // .word (LBB1 - LJTI_0_0) - const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); if (TM.getRelocationModel() == Reloc::PIC_) - Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol, + Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol, OutContext), OutContext); // If we're generating a table of Thumb addresses in static relocation // model, we need to add one to keep interworking correctly. else if (AFI->isThumbFunction()) - Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext), + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(1,OutContext), OutContext); OutStreamer->EmitValue(Expr, 4); } @@ -972,10 +973,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); } -void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { - unsigned Opcode = MI->getOpcode(); - int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1; - const MachineOperand &MO1 = MI->getOperand(OpNum); +void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) { + const MachineOperand &MO1 = MI->getOperand(1); unsigned JTI = MO1.getIndex(); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); @@ -985,51 +984,67 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - unsigned OffsetWidth = 4; - if (MI->getOpcode() == ARM::t2TBB_JT) { - OffsetWidth = 1; - // Mark the jump table as data-in-code. - OutStreamer->EmitDataRegion(MCDR_DataRegionJT8); - } else if (MI->getOpcode() == ARM::t2TBH_JT) { - OffsetWidth = 2; - // Mark the jump table as data-in-code. - OutStreamer->EmitDataRegion(MCDR_DataRegionJT16); - } for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; - const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(), + const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); // If this isn't a TBB or TBH, the entries are direct branch instructions. - if (OffsetWidth == 4) { - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B) .addExpr(MBBSymbolExpr) .addImm(ARMCC::AL) .addReg(0)); - continue; - } + } +} + +void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI, + unsigned OffsetWidth) { + assert((OffsetWidth == 1 || OffsetWidth == 2) && "invalid tbb/tbh width"); + const MachineOperand &MO1 = MI->getOperand(1); + unsigned JTI = MO1.getIndex(); + + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); + OutStreamer->EmitLabel(JTISymbol); + + // Emit each entry of the table. + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + + // Mark the jump table as data-in-code. + OutStreamer->EmitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8 + : MCDR_DataRegionJT16); + + for (auto MBB : JTBBs) { + const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(), + OutContext); // Otherwise it's an offset from the dispatch instruction. Construct an // MCExpr for the entry. We want a value of the form: - // (BasicBlockAddr - TableBeginAddr) / 2 + // (BasicBlockAddr - TBBInstAddr + 4) / 2 // // For example, a TBB table with entries jumping to basic blocks BB0 and BB1 // would look like: // LJTI_0_0: - // .byte (LBB0 - LJTI_0_0) / 2 - // .byte (LBB1 - LJTI_0_0) / 2 - const MCExpr *Expr = - MCBinaryExpr::CreateSub(MBBSymbolExpr, - MCSymbolRefExpr::Create(JTISymbol, OutContext), - OutContext); - Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext), + // .byte (LBB0 - (LCPI0_0 + 4)) / 2 + // .byte (LBB1 - (LCPI0_0 + 4)) / 2 + // where LCPI0_0 is a label defined just before the TBB instruction using + // this table. + MCSymbol *TBInstPC = GetCPISymbol(MI->getOperand(0).getImm()); + const MCExpr *Expr = MCBinaryExpr::createAdd( + MCSymbolRefExpr::create(TBInstPC, OutContext), + MCConstantExpr::create(4, OutContext), OutContext); + Expr = MCBinaryExpr::createSub(MBBSymbolExpr, Expr, OutContext); + Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(2, OutContext), OutContext); OutStreamer->EmitValue(Expr, OffsetWidth); } // Mark the end of jump table data-in-code region. 32-bit offsets use // actual branch instructions here, so we don't mark those as a data-region // at all. - if (OffsetWidth != 4) - OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); + OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); + + // Make sure the next instruction is 2-byte aligned. + EmitAlignment(1); } void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { @@ -1212,7 +1227,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR : ARM::ADR)) .addReg(MI->getOperand(0).getReg()) - .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext)) + .addExpr(MCSymbolRefExpr::create(CPISymbol, OutContext)) // Add predicate operands. .addImm(MI->getOperand(2).getImm()) .addReg(MI->getOperand(3).getReg())); @@ -1228,7 +1243,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR : ARM::ADR)) .addReg(MI->getOperand(0).getReg()) - .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext)) + .addExpr(MCSymbolRefExpr::create(JTIPICSymbol, OutContext)) // Add predicate operands. .addImm(MI->getOperand(2).getImm()) .addReg(MI->getOperand(3).getReg())); @@ -1278,7 +1293,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBL) // Predicate comes first here. .addImm(ARMCC::AL).addReg(0) - .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext))); + .addExpr(MCSymbolRefExpr::create(TRegSym, OutContext))); return; } case ARM::BMOVPCRX_CALL: { @@ -1315,7 +1330,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GV = Op.getGlobal(); const unsigned TF = Op.getTargetFlags(); MCSymbol *GVSym = GetARMGVSymbol(GV, TF); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc) .addExpr(GVSymExpr) // Add predicate operands. @@ -1332,17 +1347,17 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned TF = MI->getOperand(1).getTargetFlags(); const GlobalValue *GV = MI->getOperand(1).getGlobal(); MCSymbol *GVSym = GetARMGVSymbol(GV, TF); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext); MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(2).getImm(), OutContext); - const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext); unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; const MCExpr *PCRelExpr = - ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, - MCBinaryExpr::CreateAdd(LabelSymExpr, - MCConstantExpr::Create(PCAdj, OutContext), + ARMMCExpr::createLower16(MCBinaryExpr::createSub(GVSymExpr, + MCBinaryExpr::createAdd(LabelSymExpr, + MCConstantExpr::create(PCAdj, OutContext), OutContext), OutContext), OutContext); TmpInst.addOperand(MCOperand::createExpr(PCRelExpr)); @@ -1365,17 +1380,17 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned TF = MI->getOperand(2).getTargetFlags(); const GlobalValue *GV = MI->getOperand(2).getGlobal(); MCSymbol *GVSym = GetARMGVSymbol(GV, TF); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext); MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(3).getImm(), OutContext); - const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext); unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; const MCExpr *PCRelExpr = - ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr, - MCBinaryExpr::CreateAdd(LabelSymExpr, - MCConstantExpr::Create(PCAdj, OutContext), + ARMMCExpr::createUpper16(MCBinaryExpr::createSub(GVSymExpr, + MCBinaryExpr::createAdd(LabelSymExpr, + MCConstantExpr::create(PCAdj, OutContext), OutContext), OutContext), OutContext); TmpInst.addOperand(MCOperand::createExpr(PCRelExpr)); // Add predicate operands. @@ -1501,6 +1516,16 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitGlobalConstant(MCPE.Val.ConstVal); return; } + case ARM::JUMPTABLE_ADDRS: + EmitJumpTableAddrs(MI); + return; + case ARM::JUMPTABLE_INSTS: + EmitJumpTableInsts(MI); + return; + case ARM::JUMPTABLE_TBB: + case ARM::JUMPTABLE_TBH: + EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2); + return; case ARM::t2BR_JT: { // Lower and emit the instruction itself, then the jump table following it. EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) @@ -1509,37 +1534,19 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add predicate operands. .addImm(ARMCC::AL) .addReg(0)); - - // Output the data for the jump table itself - EmitJump2Table(MI); - return; - } - case ARM::t2TBB_JT: { - // Lower and emit the instruction itself, then the jump table following it. - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBB) - .addReg(ARM::PC) - .addReg(MI->getOperand(0).getReg()) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); - - // Output the data for the jump table itself - EmitJump2Table(MI); - // Make sure the next instruction is 2-byte aligned. - EmitAlignment(1); return; } + case ARM::t2TBB_JT: case ARM::t2TBH_JT: { - // Lower and emit the instruction itself, then the jump table following it. - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBH) - .addReg(ARM::PC) - .addReg(MI->getOperand(0).getReg()) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); - - // Output the data for the jump table itself - EmitJump2Table(MI); + unsigned Opc = MI->getOpcode() == ARM::t2TBB_JT ? ARM::t2TBB : ARM::t2TBH; + // Lower and emit the PC label, then the instruction itself. + OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm())); + EmitToStreamer(*OutStreamer, MCInstBuilder(Opc) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::tBR_JTr: @@ -1559,13 +1566,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (Opc == ARM::MOVr) TmpInst.addOperand(MCOperand::createReg(0)); EmitToStreamer(*OutStreamer, TmpInst); - - // Make sure the Thumb jump table is 4-byte aligned. - if (Opc == ARM::tMOVr) - EmitAlignment(2); - - // Output the data for the jump table itself - EmitJumpTable(MI); return; } case ARM::BR_JTm: { @@ -1589,9 +1589,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::createImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::createReg(0)); EmitToStreamer(*OutStreamer, TmpInst); - - // Output the data for the jump table itself - EmitJumpTable(MI); return; } case ARM::BR_JTadd: { @@ -1606,9 +1603,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0) // Add 's' bit operand (always reg0 for this) .addReg(0)); - - // Output the data for the jump table itself - EmitJumpTable(MI); return; } case ARM::SPACE: @@ -1695,7 +1689,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext); + const MCExpr *SymbolExpr = MCSymbolRefExpr::create(Label, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tB) .addExpr(SymbolExpr) .addImm(ARMCC::AL) diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 7bfb9447818e..a6bc3683c8b9 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -71,8 +71,9 @@ public: void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo) const override; - void EmitJumpTable(const MachineInstr *MI); - void EmitJump2Table(const MachineInstr *MI); + void EmitJumpTableAddrs(const MachineInstr *MI); + void EmitJumpTableInsts(const MachineInstr *MI); + void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth); void EmitInstruction(const MachineInstr *MI) override; bool runOnMachineFunction(MachineFunction &F) override; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c5d6b258240a..9c4b4961fe8c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -627,6 +627,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2MOVi32imm: return 8; case ARM::CONSTPOOL_ENTRY: + case ARM::JUMPTABLE_INSTS: + case ARM::JUMPTABLE_ADDRS: + case ARM::JUMPTABLE_TBB: + case ARM::JUMPTABLE_TBH: // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI->getOperand(2).getImm(); @@ -641,42 +645,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: - case ARM::t2BR_JT: - case ARM::t2TBB_JT: - case ARM::t2TBH_JT: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. For TBB, each - // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB_JT) - ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = MCID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != nullptr); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = JT[JTI].MBBs.size(); - if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction - // instead. - ++NumEntries; - return NumEntries * EntrySize + InstSize; - } case ARM::SPACE: return MI->getOperand(1).getImm(); } diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 6fa5ad7d0522..f4ec8c67c977 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -180,9 +180,7 @@ namespace { MachineInstr *MI; MachineInstr *CPEMI; MachineBasicBlock *HighWaterMark; - private: unsigned MaxDisp; - public: bool NegOk; bool IsSoImm; bool KnownAlignment; @@ -216,12 +214,24 @@ namespace { }; /// CPEntries - Keep track of all of the constant pool entry machine - /// instructions. For each original constpool index (i.e. those that - /// existed upon entry to this pass), it keeps a vector of entries. - /// Original elements are cloned as we go along; the clones are - /// put in the vector of the original element, but have distinct CPIs. + /// instructions. For each original constpool index (i.e. those that existed + /// upon entry to this pass), it keeps a vector of entries. Original + /// elements are cloned as we go along; the clones are put in the vector of + /// the original element, but have distinct CPIs. + /// + /// The first half of CPEntries contains generic constants, the second half + /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up + /// which vector it will be in here. std::vector<std::vector<CPEntry> > CPEntries; + /// Maps a JT index to the offset in CPEntries containing copies of that + /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity. + DenseMap<int, int> JumpTableEntryIndices; + + /// Maps a JT index to the LEA that actually uses the index to calculate its + /// base address. + DenseMap<int, int> JumpTableUserIndices; + /// ImmBranch - One per immediate branch, keeping the machine instruction /// pointer, conditional or unconditional, the max displacement, /// and (if isCond is true) the corresponding unconditional branch @@ -269,7 +279,8 @@ namespace { } private: - void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); + void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs); + void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs); bool BBHasFallthrough(MachineBasicBlock *MBB); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); unsigned getCPELogAlign(const MachineInstr *CPEMI); @@ -279,6 +290,7 @@ namespace { void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); void adjustBBOffsetsAfter(MachineBasicBlock *BB); bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + unsigned getCombinedIndex(const MachineInstr *CPEMI); int findInRangeCPEntry(CPUser& U, unsigned UserOffset); bool findAvailableWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); @@ -301,8 +313,9 @@ namespace { bool optimizeThumb2Instructions(); bool optimizeThumb2Branches(); bool reorderThumb2JumpTables(); - unsigned removeDeadDefinitions(MachineInstr *MI, unsigned BaseReg, - unsigned IdxReg); + bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI, + unsigned &DeadSize, bool &CanDeleteLEA, + bool &BaseRegKill); bool optimizeThumb2JumpTables(); MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); @@ -413,7 +426,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; if (!MCP->isEmpty()) - doInitialPlacement(CPEMIs); + doInitialConstPlacement(CPEMIs); + + if (MF->getJumpTableInfo()) + doInitialJumpTablePlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -478,7 +494,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) { const CPEntry & CPE = CPEntries[i][j]; - AFI->recordCPEClone(i, CPE.CPI); + if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI()) + AFI->recordCPEClone(i, CPE.CPI); } } @@ -488,6 +505,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { WaterList.clear(); CPUsers.clear(); CPEntries.clear(); + JumpTableEntryIndices.clear(); + JumpTableUserIndices.clear(); ImmBranches.clear(); PushPopMIs.clear(); T2JumpTables.clear(); @@ -495,10 +514,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { return MadeChange; } -/// doInitialPlacement - Perform the initial placement of the constant pool -/// entries. To start with, we put them all at the end of the function. +/// \brief Perform the initial placement of the regular constant pool entries. +/// To start with, we put them all at the end of the function. void -ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { +ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); @@ -556,6 +575,66 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { DEBUG(BB->dump()); } +/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH +/// instructions can be made more efficient if the jump table immediately +/// follows the instruction, it's best to place them immediately next to their +/// jumps to begin with. In almost all cases they'll never be moved from that +/// position. +void ARMConstantIslands::doInitialJumpTablePlacement( + std::vector<MachineInstr *> &CPEMIs) { + unsigned i = CPEntries.size(); + auto MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + + MachineBasicBlock *LastCorrectlyNumberedBB = nullptr; + for (MachineBasicBlock &MBB : *MF) { + auto MI = MBB.getLastNonDebugInstr(); + + unsigned JTOpcode; + switch (MI->getOpcode()) { + default: + continue; + case ARM::BR_JTadd: + case ARM::BR_JTr: + case ARM::tBR_JTr: + case ARM::BR_JTm: + JTOpcode = ARM::JUMPTABLE_ADDRS; + break; + case ARM::t2BR_JT: + JTOpcode = ARM::JUMPTABLE_INSTS; + break; + case ARM::t2TBB_JT: + JTOpcode = ARM::JUMPTABLE_TBB; + break; + case ARM::t2TBH_JT: + JTOpcode = ARM::JUMPTABLE_TBH; + break; + } + + unsigned NumOps = MI->getDesc().getNumOperands(); + MachineOperand JTOp = + MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1)); + unsigned JTI = JTOp.getIndex(); + unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t); + MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock(); + MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB); + MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(), + DebugLoc(), TII->get(JTOpcode)) + .addImm(i++) + .addJumpTableIndex(JTI) + .addImm(Size); + CPEMIs.push_back(CPEMI); + CPEntries.emplace_back(1, CPEntry(CPEMI, JTI)); + JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1)); + if (!LastCorrectlyNumberedBB) + LastCorrectlyNumberedBB = &MBB; + } + + // If we did anything then we need to renumber the subsequent blocks. + if (LastCorrectlyNumberedBB) + MF->RenumberBlocks(LastCorrectlyNumberedBB); +} + /// BBHasFallthrough - Return true if the specified basic block can fallthrough /// into the block immediately after it. bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { @@ -595,9 +674,21 @@ ARMConstantIslands::CPEntry /// getCPELogAlign - Returns the required alignment of the constant pool entry /// represented by CPEMI. Alignment is measured in log2(bytes) units. unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { - assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); + switch (CPEMI->getOpcode()) { + case ARM::CONSTPOOL_ENTRY: + break; + case ARM::JUMPTABLE_TBB: + return 0; + case ARM::JUMPTABLE_TBH: + case ARM::JUMPTABLE_INSTS: + return 1; + case ARM::JUMPTABLE_ADDRS: + return 2; + default: + llvm_unreachable("unknown constpool entry kind"); + } - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); unsigned Align = MCP->getConstants()[CPI].getAlignment(); assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); @@ -706,12 +797,14 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) PushPopMIs.push_back(I); - if (Opc == ARM::CONSTPOOL_ENTRY) + if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS || + Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB || + Opc == ARM::JUMPTABLE_TBH) continue; // Scan the instructions for constant pool operands. for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) - if (I->getOperand(op).isCPI()) { + if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) { // We found one. The addressing mode tells us the max displacement // from the PC that this instruction permits. @@ -727,6 +820,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { // Taking the address of a CP entry. case ARM::LEApcrel: + case ARM::LEApcrelJT: // This takes a SoImm, which is 8 bit immediate rotated. We'll // pretend the maximum offset is 255 * 4. Since each instruction // 4 byte wide, this is always correct. We'll check for other @@ -737,10 +831,12 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { IsSoImm = true; break; case ARM::t2LEApcrel: + case ARM::t2LEApcrelJT: Bits = 12; NegOk = true; break; case ARM::tLEApcrel: + case ARM::tLEApcrelJT: Bits = 8; Scale = 4; break; @@ -768,6 +864,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { // Remember that this is a user of a CP entry. unsigned CPI = I->getOperand(op).getIndex(); + if (I->getOperand(op).isJTI()) { + JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size())); + CPI = JumpTableEntryIndices[CPI]; + } + MachineInstr *CPEMI = CPEMIs[CPI]; unsigned MaxOffs = ((1 << Bits)-1) * Scale; CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); @@ -1101,6 +1202,13 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, return false; } +unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) { + if (CPEMI->getOperand(1).isCPI()) + return CPEMI->getOperand(1).getIndex(); + + return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()]; +} + /// LookForCPEntryInRange - see if the currently referenced CPE is in range; /// if not, see if an in-range clone of the CPE is in range, and if so, /// change the data structures so the user references the clone. Returns: @@ -1120,7 +1228,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) } // No. Look for previously created clones of the CPE that are in range. - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); std::vector<CPEntry> &CPEs = CPEntries[CPI]; for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { // We already tried this one @@ -1365,7 +1473,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); unsigned Size = CPEMI->getOperand(2).getImm(); // Compute this only once, it's expensive. unsigned UserOffset = getUserOffset(U); @@ -1429,17 +1537,17 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // Update internal data structures to account for the newly inserted MBB. updateForInsertedWaterBlock(NewIsland); - // Decrement the old entry, and remove it if refcount becomes 0. - decrementCPEReferenceCount(CPI, CPEMI); - // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. U.HighWaterMark = NewIsland; - U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) - .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); + U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc()) + .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size); CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; + // Decrement the old entry, and remove it if refcount becomes 0. + decrementCPEReferenceCount(CPI, CPEMI); + // Mark the basic block as aligned as required by the const-pool entry. NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); @@ -1844,77 +1952,120 @@ bool ARMConstantIslands::optimizeThumb2Branches() { return MadeChange; } -/// If we've formed a TBB or TBH instruction, the base register is now -/// redundant. In most cases, the instructions defining it will now be dead and -/// can be tidied up. This function removes them if so, and returns the number -/// of bytes saved. -unsigned ARMConstantIslands::removeDeadDefinitions(MachineInstr *MI, - unsigned BaseReg, - unsigned IdxReg) { - unsigned BytesRemoved = 0; - MachineBasicBlock *MBB = MI->getParent(); +static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg, + unsigned BaseReg) { + if (I.getOpcode() != ARM::t2ADDrs) + return false; - // Scan backwards to find the instruction that defines the base - // register. Due to post-RA scheduling, we can't count on it - // immediately preceding the branch instruction. - MachineBasicBlock::iterator PrevI = MI; - MachineBasicBlock::iterator B = MBB->begin(); - while (PrevI != B && !PrevI->definesRegister(BaseReg)) - --PrevI; - - // If for some reason we didn't find it, we can't do anything, so - // just skip this one. - if (!PrevI->definesRegister(BaseReg) || PrevI->hasUnmodeledSideEffects() || - PrevI->mayStore()) - return BytesRemoved; - - MachineInstr *AddrMI = PrevI; - unsigned NewBaseReg = BytesRemoved; - - // Examine the instruction that calculates the jumptable entry address. Make - // sure it only defines the base register and kills any uses other than the - // index register. We also need precisely one use to trace backwards to - // (hopefully) the LEA. - for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { - const MachineOperand &MO = AddrMI->getOperand(k); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef() && MO.getReg() != BaseReg) - return BytesRemoved; + if (I.getOperand(0).getReg() != EntryReg) + return false; - if (MO.isUse() && MO.getReg() != IdxReg) { - if (!MO.isKill() || (NewBaseReg != 0 && NewBaseReg != MO.getReg())) - return BytesRemoved; - NewBaseReg = MO.getReg(); + if (I.getOperand(1).getReg() != BaseReg) + return false; + + // FIXME: what about CC and IdxReg? + return true; +} + +/// \brief While trying to form a TBB/TBH instruction, we may (if the table +/// doesn't immediately follow the BR_JT) need access to the start of the +/// jump-table. We know one instruction that produces such a register; this +/// function works out whether that definition can be preserved to the BR_JT, +/// possibly by removing an intervening addition (which is usually needed to +/// calculate the actual entry to jump to). +bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI, + MachineInstr *LEAMI, + unsigned &DeadSize, + bool &CanDeleteLEA, + bool &BaseRegKill) { + if (JumpMI->getParent() != LEAMI->getParent()) + return false; + + // Now we hope that we have at least these instructions in the basic block: + // BaseReg = t2LEA ... + // [...] + // EntryReg = t2ADDrs BaseReg, ... + // [...] + // t2BR_JT EntryReg + // + // We have to be very conservative about what we recognise here though. The + // main perturbing factors to watch out for are: + // + Spills at any point in the chain: not direct problems but we would + // expect a blocking Def of the spilled register so in practice what we + // can do is limited. + // + EntryReg == BaseReg: this is the one situation we should allow a Def + // of BaseReg, but only if the t2ADDrs can be removed. + // + Some instruction other than t2ADDrs computing the entry. Not seen in + // the wild, but we should be careful. + unsigned EntryReg = JumpMI->getOperand(0).getReg(); + unsigned BaseReg = LEAMI->getOperand(0).getReg(); + + CanDeleteLEA = true; + BaseRegKill = false; + MachineInstr *RemovableAdd = nullptr; + MachineBasicBlock::iterator I(LEAMI); + for (++I; &*I != JumpMI; ++I) { + if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) { + RemovableAdd = &*I; + break; + } + + for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) { + const MachineOperand &MO = I->getOperand(K); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() == BaseReg) + return false; + if (MO.isUse() && MO.getReg() == BaseReg) { + BaseRegKill = BaseRegKill || MO.isKill(); + CanDeleteLEA = false; + } + } + } + + if (!RemovableAdd) + return true; + + // Check the add really is removable, and that nothing else in the block + // clobbers BaseReg. + for (++I; &*I != JumpMI; ++I) { + for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) { + const MachineOperand &MO = I->getOperand(K); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() == BaseReg) + return false; + if (MO.isUse() && MO.getReg() == EntryReg) + RemovableAdd = nullptr; } } - // Want to continue searching for AddrMI, but there are 2 problems: AddrMI is - // going away soon, and even decrementing once may be invalid. - if (PrevI != B) - PrevI = std::prev(PrevI); - - DEBUG(dbgs() << "remove addr: " << *AddrMI); - BytesRemoved += TII->GetInstSizeInBytes(AddrMI); - AddrMI->eraseFromParent(); - - // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction - // that gave us the initial base register definition. - for (; PrevI != B && !PrevI->definesRegister(NewBaseReg); --PrevI) - ; - - // The instruction should be a tLEApcrel or t2LEApcrelJT; we want - // to delete it as well. - MachineInstr *LeaMI = PrevI; - if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && - LeaMI->getOpcode() != ARM::t2LEApcrelJT) || - LeaMI->getOperand(0).getReg() != NewBaseReg) - return BytesRemoved; - - DEBUG(dbgs() << "remove lea: " << *LeaMI); - BytesRemoved += TII->GetInstSizeInBytes(LeaMI); - LeaMI->eraseFromParent(); - return BytesRemoved; + if (RemovableAdd) { + RemovableAdd->eraseFromParent(); + DeadSize += 4; + } else if (BaseReg == EntryReg) { + // The add wasn't removable, but clobbered the base for the TBB. So we can't + // preserve it. + return false; + } + + // We reached the end of the block without seeing another definition of + // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be + // used in the TBB/TBH if necessary. + return true; +} + +/// \brief Returns whether CPEMI is the first instruction in the block +/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so, +/// we can switch the first register to PC and usually remove the address +/// calculation that preceeded it. +static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) { + MachineFunction::iterator MBB = JTMI->getParent(); + MachineFunction *MF = MBB->getParent(); + ++MBB; + + return MBB != MF->end() && MBB->begin() != MBB->end() && + &*MBB->begin() == CPEMI; } /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller @@ -1955,37 +2106,79 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { break; } - if (ByteOk || HalfWordOk) { - MachineBasicBlock *MBB = MI->getParent(); - unsigned BaseReg = MI->getOperand(0).getReg(); - bool BaseRegKill = MI->getOperand(0).isKill(); - if (!BaseRegKill) - continue; - unsigned IdxReg = MI->getOperand(1).getReg(); - bool IdxRegKill = MI->getOperand(1).isKill(); + if (!ByteOk && !HalfWordOk) + continue; - DEBUG(dbgs() << "Shrink JT: " << *MI); - unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; - MachineBasicBlock::iterator MI_JT = MI; - MachineInstr *NewJTMI = - BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) - .addReg(IdxReg, getKillRegState(IdxRegKill)) - .addJumpTableIndex(JTI, JTOP.getTargetFlags()); - DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); - // FIXME: Insert an "ALIGN" instruction to ensure the next instruction - // is 2-byte aligned. For now, asm printer will fix it up. - unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); - unsigned OrigSize = TII->GetInstSizeInBytes(MI); - unsigned DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg); - MI->eraseFromParent(); + MachineBasicBlock *MBB = MI->getParent(); + if (!MI->getOperand(0).isKill()) // FIXME: needed now? + continue; + unsigned IdxReg = MI->getOperand(1).getReg(); + bool IdxRegKill = MI->getOperand(1).isKill(); - int delta = OrigSize - NewSize + DeadSize; - BBInfo[MBB->getNumber()].Size -= delta; - adjustBBOffsetsAfter(MBB); + CPUser &User = CPUsers[JumpTableUserIndices[JTI]]; + unsigned DeadSize = 0; + bool CanDeleteLEA = false; + bool BaseRegKill = false; + bool PreservedBaseReg = + preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill); - ++NumTBs; - MadeChange = true; + if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg) + continue; + + DEBUG(dbgs() << "Shrink JT: " << *MI); + MachineInstr *CPEMI = User.CPEMI; + unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; + MachineBasicBlock::iterator MI_JT = MI; + MachineInstr *NewJTMI = + BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) + .addReg(User.MI->getOperand(0).getReg(), + getKillRegState(BaseRegKill)) + .addReg(IdxReg, getKillRegState(IdxRegKill)) + .addJumpTableIndex(JTI, JTOP.getTargetFlags()) + .addImm(CPEMI->getOperand(0).getImm()); + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); + + unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH; + CPEMI->setDesc(TII->get(JTOpc)); + + if (jumpTableFollowsTB(MI, User.CPEMI)) { + NewJTMI->getOperand(0).setReg(ARM::PC); + NewJTMI->getOperand(0).setIsKill(false); + + if (CanDeleteLEA) { + User.MI->eraseFromParent(); + DeadSize += 4; + + // The LEA was eliminated, the TBB instruction becomes the only new user + // of the jump table. + User.MI = NewJTMI; + User.MaxDisp = 4; + User.NegOk = false; + User.IsSoImm = false; + User.KnownAlignment = false; + } else { + // The LEA couldn't be eliminated, so we must add another CPUser to + // record the TBB or TBH use. + int CPEntryIdx = JumpTableEntryIndices[JTI]; + auto &CPEs = CPEntries[CPEntryIdx]; + auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) { + return E.CPEMI == User.CPEMI; + }); + ++Entry->RefCount; + CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false)); + } } + + unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); + unsigned OrigSize = TII->GetInstSizeInBytes(MI); + MI->eraseFromParent(); + + int Delta = OrigSize - NewSize + DeadSize; + BBInfo[MBB->getNumber()].Size -= Delta; + adjustBBOffsetsAfter(MBB); + + ++NumTBs; + MadeChange = true; } return MadeChange; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 4405625e47cd..50afb192b331 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -251,6 +252,9 @@ private: // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); + SDNode *SelectReadRegister(SDNode *N); + SDNode *SelectWriteRegister(SDNode *N); + SDNode *SelectInlineAsm(SDNode *N); SDNode *SelectConcatVector(SDNode *N); @@ -2457,6 +2461,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::WRITE_REGISTER: { + SDNode *ResNode = SelectWriteRegister(N); + if (ResNode) + return ResNode; + break; + } + case ISD::READ_REGISTER: { + SDNode *ResNode = SelectReadRegister(N); + if (ResNode) + return ResNode; + break; + } case ISD::INLINEASM: { SDNode *ResNode = SelectInlineAsm(N); if (ResNode) @@ -3336,6 +3352,418 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +// Inspect a register string of the form +// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or +// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string +// and obtain the integer operands from them, adding these operands to the +// provided vector. +static void getIntOperandsFromRegisterString(StringRef RegString, + SelectionDAG *CurDAG, SDLoc DL, + std::vector<SDValue>& Ops) { + SmallVector<StringRef, 5> Fields; + RegString.split(Fields, ":"); + + if (Fields.size() > 1) { + bool AllIntFields = true; + + for (StringRef Field : Fields) { + // Need to trim out leading 'cp' characters and get the integer field. + unsigned IntField; + AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); + Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); + } + + assert(AllIntFields && + "Unexpected non-integer value in special register string."); + } +} + +// Maps a Banked Register string to its mask value. The mask value returned is +// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register +// mask operand, which expresses which register is to be used, e.g. r8, and in +// which mode it is to be used, e.g. usr. Returns -1 to signify that the string +// was invalid. +static inline int getBankedRegisterMask(StringRef RegString) { + return StringSwitch<int>(RegString.lower()) + .Case("r8_usr", 0x00) + .Case("r9_usr", 0x01) + .Case("r10_usr", 0x02) + .Case("r11_usr", 0x03) + .Case("r12_usr", 0x04) + .Case("sp_usr", 0x05) + .Case("lr_usr", 0x06) + .Case("r8_fiq", 0x08) + .Case("r9_fiq", 0x09) + .Case("r10_fiq", 0x0a) + .Case("r11_fiq", 0x0b) + .Case("r12_fiq", 0x0c) + .Case("sp_fiq", 0x0d) + .Case("lr_fiq", 0x0e) + .Case("lr_irq", 0x10) + .Case("sp_irq", 0x11) + .Case("lr_svc", 0x12) + .Case("sp_svc", 0x13) + .Case("lr_abt", 0x14) + .Case("sp_abt", 0x15) + .Case("lr_und", 0x16) + .Case("sp_und", 0x17) + .Case("lr_mon", 0x1c) + .Case("sp_mon", 0x1d) + .Case("elr_hyp", 0x1e) + .Case("sp_hyp", 0x1f) + .Case("spsr_fiq", 0x2e) + .Case("spsr_irq", 0x30) + .Case("spsr_svc", 0x32) + .Case("spsr_abt", 0x34) + .Case("spsr_und", 0x36) + .Case("spsr_mon", 0x3c) + .Case("spsr_hyp", 0x3e) + .Default(-1); +} + +// Maps a MClass special register string to its value for use in the +// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand. +// Returns -1 to signify that the string was invalid. +static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { + return StringSwitch<int>(RegString.lower()) + .Case("apsr", 0x0) + .Case("iapsr", 0x1) + .Case("eapsr", 0x2) + .Case("xpsr", 0x3) + .Case("ipsr", 0x5) + .Case("epsr", 0x6) + .Case("iepsr", 0x7) + .Case("msp", 0x8) + .Case("psp", 0x9) + .Case("primask", 0x10) + .Case("basepri", 0x11) + .Case("basepri_max", 0x12) + .Case("faultmask", 0x13) + .Case("control", 0x14) + .Default(-1); +} + +// The flags here are common to those allowed for apsr in the A class cores and +// those allowed for the special registers in the M class cores. Returns a +// value representing which flags were present, -1 if invalid. +static inline int getMClassFlagsMask(StringRef Flags) { + if (Flags.empty()) + return 0x3; + + return StringSwitch<int>(Flags) + .Case("g", 0x1) + .Case("nzcvq", 0x2) + .Case("nzcvqg", 0x3) + .Default(-1); +} + +static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, + const ARMSubtarget *Subtarget) { + // Ensure that the register (without flags) was a valid M Class special + // register. + int SYSmvalue = getMClassRegisterSYSmValueMask(Reg); + if (SYSmvalue == -1) + return -1; + + // basepri, basepri_max and faultmask are only valid for V7m. + if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13) + return -1; + + // If it was a read then we won't be expecting flags and so at this point + // we can return the mask. + if (IsRead) { + assert (Flags.empty() && "Unexpected flags for reading M class register."); + return SYSmvalue; + } + + // We know we are now handling a write so need to get the mask for the flags. + int Mask = getMClassFlagsMask(Flags); + + // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values + // shouldn't have flags present. + if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty())) + return -1; + + // The _g and _nzcvqg versions are only valid if the DSP extension is + // available. + if (!Subtarget->hasThumb2DSP() && (Mask & 0x2)) + return -1; + + // The register was valid so need to put the mask in the correct place + // (the flags need to be in bits 11-10) and combine with the SYSmvalue to + // construct the operand for the instruction node. + if (SYSmvalue < 0x4) + return SYSmvalue | Mask << 10; + + return SYSmvalue; +} + +static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { + // The mask operand contains the special register (R Bit) in bit 4, whether + // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and + // bits 3-0 contains the fields to be accessed in the special register, set by + // the flags provided with the register. + int Mask = 0; + if (Reg == "apsr") { + // The flags permitted for apsr are the same flags that are allowed in + // M class registers. We get the flag value and then shift the flags into + // the correct place to combine with the mask. + Mask = getMClassFlagsMask(Flags); + if (Mask == -1) + return -1; + return Mask << 2; + } + + if (Reg != "cpsr" && Reg != "spsr") { + return -1; + } + + // This is the same as if the flags were "fc" + if (Flags.empty() || Flags == "all") + return Mask | 0x9; + + // Inspect the supplied flags string and set the bits in the mask for + // the relevant and valid flags allowed for cpsr and spsr. + for (char Flag : Flags) { + int FlagVal; + switch (Flag) { + case 'c': + FlagVal = 0x1; + break; + case 'x': + FlagVal = 0x2; + break; + case 's': + FlagVal = 0x4; + break; + case 'f': + FlagVal = 0x8; + break; + default: + FlagVal = 0; + } + + // This avoids allowing strings where the same flag bit appears twice. + if (!FlagVal || (Mask & FlagVal)) + return -1; + Mask |= FlagVal; + } + + // If the register is spsr then we need to set the R bit. + if (Reg == "spsr") + Mask |= 0x10; + + return Mask; +} + +// Lower the read_register intrinsic to ARM specific DAG nodes +// using the supplied metadata string to select the instruction node to use +// and the registers/masks to construct as operands for the node. +SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){ + const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); + const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + bool IsThumb2 = Subtarget->isThumb2(); + SDLoc DL(N); + + std::vector<SDValue> Ops; + getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); + + if (!Ops.empty()) { + // If the special register string was constructed of fields (as defined + // in the ACLE) then need to lower to MRC node (32 bit) or + // MRRC node(64 bit), we can make the distinction based on the number of + // operands we have. + unsigned Opcode; + SmallVector<EVT, 3> ResTypes; + if (Ops.size() == 5){ + Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; + ResTypes.append({ MVT::i32, MVT::Other }); + } else { + assert(Ops.size() == 3 && + "Invalid number of fields in special register string."); + Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; + ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); + } + + Ops.push_back(getAL(CurDAG, DL)); + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + Ops.push_back(N->getOperand(0)); + return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops); + } + + std::string SpecialReg = RegString->getString().lower(); + + int BankedReg = getBankedRegisterMask(SpecialReg); + if (BankedReg != -1) { + Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), + getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, + DL, MVT::i32, MVT::Other, Ops); + } + + // The VFP registers are read by creating SelectionDAG nodes with opcodes + // corresponding to the register that is being read from. So we switch on the + // string to find which opcode we need to use. + unsigned Opcode = StringSwitch<unsigned>(SpecialReg) + .Case("fpscr", ARM::VMRS) + .Case("fpexc", ARM::VMRS_FPEXC) + .Case("fpsid", ARM::VMRS_FPSID) + .Case("mvfr0", ARM::VMRS_MVFR0) + .Case("mvfr1", ARM::VMRS_MVFR1) + .Case("mvfr2", ARM::VMRS_MVFR2) + .Case("fpinst", ARM::VMRS_FPINST) + .Case("fpinst2", ARM::VMRS_FPINST2) + .Default(0); + + // If an opcode was found then we can lower the read to a VFP instruction. + if (Opcode) { + if (!Subtarget->hasVFP2()) + return nullptr; + if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8()) + return nullptr; + + Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops); + } + + // If the target is M Class then need to validate that the register string + // is an acceptable value, so check that a mask can be constructed from the + // string. + if (Subtarget->isMClass()) { + int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget); + if (SYSmValue == -1) + return nullptr; + + SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), + getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops); + } + + // Here we know the target is not M Class so we need to check if it is one + // of the remaining possible values which are apsr, cpsr or spsr. + if (SpecialReg == "apsr" || SpecialReg == "cpsr") { + Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL, + MVT::i32, MVT::Other, Ops); + } + + if (SpecialReg == "spsr") { + Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, + DL, MVT::i32, MVT::Other, Ops); + } + + return nullptr; +} + +// Lower the write_register intrinsic to ARM specific DAG nodes +// using the supplied metadata string to select the instruction node to use +// and the registers/masks to use in the nodes +SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ + const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); + const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + bool IsThumb2 = Subtarget->isThumb2(); + SDLoc DL(N); + + std::vector<SDValue> Ops; + getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); + + if (!Ops.empty()) { + // If the special register string was constructed of fields (as defined + // in the ACLE) then need to lower to MCR node (32 bit) or + // MCRR node(64 bit), we can make the distinction based on the number of + // operands we have. + unsigned Opcode; + if (Ops.size() == 5) { + Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; + Ops.insert(Ops.begin()+2, N->getOperand(2)); + } else { + assert(Ops.size() == 3 && + "Invalid number of fields in special register string."); + Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; + SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; + Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); + } + + Ops.push_back(getAL(CurDAG, DL)); + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + Ops.push_back(N->getOperand(0)); + + return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); + } + + std::string SpecialReg = RegString->getString().lower(); + int BankedReg = getBankedRegisterMask(SpecialReg); + if (BankedReg != -1) { + Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), + getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, + DL, MVT::Other, Ops); + } + + // The VFP registers are written to by creating SelectionDAG nodes with + // opcodes corresponding to the register that is being written. So we switch + // on the string to find which opcode we need to use. + unsigned Opcode = StringSwitch<unsigned>(SpecialReg) + .Case("fpscr", ARM::VMSR) + .Case("fpexc", ARM::VMSR_FPEXC) + .Case("fpsid", ARM::VMSR_FPSID) + .Case("fpinst", ARM::VMSR_FPINST) + .Case("fpinst2", ARM::VMSR_FPINST2) + .Default(0); + + if (Opcode) { + if (!Subtarget->hasVFP2()) + return nullptr; + Ops = { N->getOperand(2), getAL(CurDAG, DL), + CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; + return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); + } + + SmallVector<StringRef, 5> Fields; + StringRef(SpecialReg).split(Fields, "_", 1, false); + std::string Reg = Fields[0].str(); + StringRef Flags = Fields.size() == 2 ? Fields[1] : ""; + + // If the target was M Class then need to validate the special register value + // and retrieve the mask for use in the instruction node. + if (Subtarget->isMClass()) { + // basepri_max gets split so need to correct Reg and Flags. + if (SpecialReg == "basepri_max") { + Reg = SpecialReg; + Flags = ""; + } + int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget); + if (SYSmValue == -1) + return nullptr; + + SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), + N->getOperand(2), getAL(CurDAG, DL), + CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; + return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops); + } + + // We then check to see if a valid mask can be constructed for one of the + // register string values permitted for the A and R class cores. These values + // are apsr, spsr and cpsr; these are also valid on older cores. + int Mask = getARClassRegisterMask(Reg, Flags); + if (Mask != -1) { + Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), + getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), + N->getOperand(0) }; + return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, + DL, MVT::Other, Ops); + } + + return nullptr; +} + SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ std::vector<SDValue> AsmNodeOperands; unsigned Flag, Kind; @@ -3492,13 +3920,29 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { - assert(ConstraintID == InlineAsm::Constraint_m && - "unexpected asm memory constraint"); - // Require the address to be in a register. That is safe for all ARM - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); - return false; + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + // FIXME: It seems strange that 'i' is needed here since it's supposed to + // be an immediate and not a memory constraint. + // Fallthrough. + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + case InlineAsm::Constraint_Um: + case InlineAsm::Constraint_Un: + case InlineAsm::Constraint_Uq: + case InlineAsm::Constraint_Us: + case InlineAsm::Constraint_Ut: + case InlineAsm::Constraint_Uv: + case InlineAsm::Constraint_Uy: + // Require the address to be in a register. That is safe for all ARM + // variants and it is hard to do anything much smarter without knowing + // how the operand is used. + OutOps.push_back(Op); + return false; + } + return true; } /// createARMISelDag - This pass converts a legalized DAG into a diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 629cc90d67de..47c8400a668f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -426,6 +426,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); + setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); + setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -2378,6 +2381,24 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { return !Subtarget->isThumb1Only(); } +// Trying to write a 64 bit value so need to split into two 32 bit values first, +// and pass the lower and high parts through. +static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + SDValue WriteValue = Op->getOperand(2); + + // This function is only supposed to be called for i64 type argument. + assert(WriteValue.getValueType() == MVT::i64 + && "LowerWRITE_REGISTER called for non-i64 type argument."); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, + DAG.getConstant(0, DL, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, + DAG.getConstant(1, DL, MVT::i32)); + SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; + return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise @@ -4085,7 +4106,28 @@ unsigned ARMTargetLowering::getRegisterByName(const char* RegName, .Default(0); if (Reg) return Reg; - report_fatal_error("Invalid register name global variable"); + report_fatal_error(Twine("Invalid register name \"" + + StringRef(RegName) + "\".")); +} + +// Result is 64 bit value so split into two 32 bit values and return as a +// pair of values. +static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + SDLoc DL(N); + + // This function is only supposed to be called for i64 type destination. + assert(N->getValueType(0) == MVT::i64 + && "ExpandREAD_REGISTER called for non-i64 type result."); + + SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL, + DAG.getVTList(MVT::i32, MVT::i32, MVT::Other), + N->getOperand(0), + N->getOperand(1)); + + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0), + Read.getValue(1))); + Results.push_back(Read.getOperand(0)); } /// ExpandBITCAST - If the target supports VFP, this function is called to @@ -6355,6 +6397,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); + case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: @@ -6439,6 +6482,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); + case ISD::READ_REGISTER: + ExpandREAD_REGISTER(N, Results, DAG); + break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; @@ -10222,7 +10268,8 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { EVT VT = getValueType(Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 63e87c5282d1..c0b329c5a1e5 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -286,7 +286,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -346,8 +347,31 @@ namespace llvm { unsigned getInlineAsmMemConstraint( const std::string &ConstraintCode) const override { - // FIXME: Map different constraints differently. - return InlineAsm::Constraint_m; + if (ConstraintCode == "Q") + return InlineAsm::Constraint_Q; + else if (ConstraintCode.size() == 2) { + if (ConstraintCode[0] == 'U') { + switch(ConstraintCode[1]) { + default: + break; + case 'm': + return InlineAsm::Constraint_Um; + case 'n': + return InlineAsm::Constraint_Un; + case 'q': + return InlineAsm::Constraint_Uq; + case 's': + return InlineAsm::Constraint_Us; + case 't': + return InlineAsm::Constraint_Ut; + case 'v': + return InlineAsm::Constraint_Uv; + case 'y': + return InlineAsm::Constraint_Uy; + } + } + } + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } const ARMSubtarget* getSubtarget() const { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 778fd17137f6..b8cac135baf6 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1826,6 +1826,32 @@ def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), NoItinerary, []>; +/// A jumptable consisting of direct 32-bit addresses of the destination basic +/// blocks (either absolute, or relative to the start of the jump-table in PIC +/// mode). Used mostly in ARM and Thumb-1 modes. +def JUMPTABLE_ADDRS : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables +/// that cannot be optimised to use TBB or TBH. +def JUMPTABLE_INSTS : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 8-bit unsigned integers representing offsets from +/// a TBB instruction. +def JUMPTABLE_TBB : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 16-bit unsigned integers representing offsets from +/// a TBH instruction. +def JUMPTABLE_TBH : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + + // FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE // from removing one half of the matched pairs. That breaks PEI, which assumes // these will always be in pairs, and asserts if it finds otherwise. Better way? @@ -2224,7 +2250,7 @@ let isBranch = 1, isTerminator = 1 in { [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>, Sched<[WriteBr]>; - let isNotDuplicable = 1, isIndirectBranch = 1 in { + let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), (ins GPR:$target, i32imm:$jt), 0, IIC_Br, @@ -5039,10 +5065,11 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; -class MovRRCopro<string opc, bit direction, list<dag> pattern = []> - : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), - NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { +class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag> + pattern = []> + : ABI<0b1100, oops, iops, NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", + pattern> { + let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -5060,9 +5087,13 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []> } def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, c_imm:$CRm), [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; -def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; +def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, + (outs GPRnopc:$Rt, GPRnopc:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 0fecfa1319d3..40414da3ca81 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -526,6 +526,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 0, IIC_Br, [(ARMbrjt tGPR:$target, tjumptable:$jt)]>, Sched<[WriteBrTbl]> { + let Size = 2; list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 814b524b2bcb..aba8a7b10fd9 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3531,20 +3531,20 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let AsmMatchConverter = "cvtThumbBranches"; } -let isNotDuplicable = 1, isIndirectBranch = 1 in { +let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), (ins GPR:$target, GPR:$index, i32imm:$jt), 0, IIC_Br, [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>, Sched<[WriteBr]>; -// FIXME: Add a non-pc based case that can be predicated. +// FIXME: Add a case that can be predicated. def t2TBB_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>, + (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; def t2TBH_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>, + (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br, @@ -4141,11 +4141,9 @@ class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, let Inst{19-16} = CRn; } -class t2MovRRCopro<bits<4> Op, string opc, bit direction, +class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, list<dag> pattern = []> - : T2Cop<Op, (outs), - (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { + : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{27-24} = 0b1100; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -4210,19 +4208,25 @@ def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), /* from ARM core register to coprocessor */ -def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, +def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs), + (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, + c_imm:$CRm), [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, imm:$CRm)]>; -def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, - GPR:$Rt2, imm:$CRm)]> { +def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs), + (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, + c_imm:$CRm), + [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, + GPR:$Rt2, imm:$CRm)]> { let Predicates = [IsThumb2, PreV8]; } /* from coprocessor to ARM core register */ -def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>; +def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1, (outs GPR:$Rt, GPR:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)>; -def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1> { +def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)> { let Predicates = [IsThumb2, PreV8]; } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5b62a21706ce..46ff326ba630 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains a pass that performs load / store related peephole -// optimizations. This pass should be run after register allocation. +/// \file This file contains a pass that performs load / store related peephole +/// optimizations. This pass should be run after register allocation. // //===----------------------------------------------------------------------===// @@ -58,10 +58,9 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); -/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine -/// load / store instructions to form ldm / stm instructions. - namespace { + /// Post- register allocation pass the combine load / store instructions to + /// form ldm / stm instructions. struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; ARMLoadStoreOpt() : MachineFunctionPass(ID) {} @@ -271,10 +270,7 @@ static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { } } -namespace llvm { - namespace ARM_AM { - -AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { +static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); case ARM::LDMIA_RET: @@ -328,9 +324,6 @@ AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) { } } - } // end namespace ARM_AM -} // end namespace llvm - static bool isT1i32Load(unsigned Opc) { return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; } @@ -469,9 +462,9 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, } } -/// MergeOps - Create and insert a LDM or STM with Base as base register and -/// registers in Regs as the register operands that would be loaded / stored. -/// It returns true if the transformation is done. +/// Create and insert a LDM or STM with Base as base register and registers in +/// Regs as the register operands that would be loaded / stored. It returns +/// true if the transformation is done. bool ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -665,7 +658,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return true; } -/// \brief Find all instructions using a given imp-def within a range. +/// Find all instructions using a given imp-def within a range. /// /// We are trying to combine a range of instructions, one of which (located at /// position RangeBegin) implicitly defines a register. The final LDM/STM will @@ -721,8 +714,7 @@ void ARMLoadStoreOpt::findUsesOfImpDef( } } -// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on -// success. +/// Call MergeOps and update MemOps and merges accordingly on success. void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, MemOpQueue &memOps, unsigned memOpsBegin, unsigned memOpsEnd, @@ -762,10 +754,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, Regs.push_back(std::make_pair(Reg, isKill)); // Collect any implicit defs of super-registers. They must be preserved. - for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { - if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) + for (const MachineOperand &MO : memOps[i].MBBI->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead()) continue; - unsigned DefReg = MO->getReg(); + unsigned DefReg = MO.getReg(); if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) ImpDefs.push_back(DefReg); @@ -823,8 +815,8 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, } } -/// MergeLDR_STR - Merge a number of load / store instructions into one or more -/// load / store multiple instructions. +/// Merge a number of load / store instructions into one or more load / store +/// multiple instructions. void ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, unsigned Opcode, unsigned Size, @@ -1083,8 +1075,8 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } } -/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base -/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: +/// Fold proceeding/trailing inc/dec of base register into the +/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// /// stmia rn, <ra, rb, rc> /// rn := rn + 4 * 3; @@ -1118,7 +1110,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, return false; bool DoMerge = false; - ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); + ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); @@ -1231,8 +1223,8 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, } } -/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base -/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: +/// Fold proceeding/trailing inc/dec of base register into the +/// LDR/STR/FLD{D|S}/FST{D|S} op when possible: bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const TargetInstrInfo *TII, @@ -1373,8 +1365,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return true; } -/// isMemoryOp - Returns true if instruction is a memory operation that this -/// pass is capable of operating on. +/// Returns true if instruction is a memory operation that this pass is capable +/// of operating on. static bool isMemoryOp(const MachineInstr *MI) { // When no memory operands are present, conservatively assume unaligned, // volatile, unfoldable. @@ -1428,8 +1420,8 @@ static bool isMemoryOp(const MachineInstr *MI) { return false; } -/// AdvanceRS - Advance register scavenger to just before the earliest memory -/// op that is being merged. +/// Advance register scavenger to just before the earliest memory op that is +/// being merged. void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { MachineBasicBlock::iterator Loc = MemOps[0].MBBI; unsigned Position = MemOps[0].Position; @@ -1472,8 +1464,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = &*MBBI; unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::LDRD || Opcode == ARM::STRD || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { + if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { const MachineOperand &BaseOp = MI->getOperand(2); unsigned BaseReg = BaseOp.getReg(); unsigned EvenReg = MI->getOperand(0).getReg(); @@ -1588,8 +1579,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } -/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR -/// ops of the same base and incrementing offset into LDM / STM ops. +/// An optimization pass to turn multiple LDR / STR ops of the same base and +/// incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; @@ -1770,9 +1761,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { return NumMerges > 0; } -/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops -/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it -/// directly restore the value of LR into pc. +/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") +/// into the preceding stack restore so it directly restore the value of LR +/// into pc. /// ldmfd sp!, {..., lr} /// bx lr /// or @@ -1834,12 +1825,9 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return Modified; } - -/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move -/// load / stores from consecutive locations close to make it more -/// likely they will be combined later. - namespace { + /// Pre- register allocation pass that move load / stores from consecutive + /// locations close to make it more likely they will be combined later. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} @@ -1936,7 +1924,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, } -/// Copy Op0 and Op1 operands into a new array assigned to MI. +/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI. static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, MachineInstr *Op1) { assert(MI->memoperands_empty() && "expected a new machineinstr"); @@ -1954,10 +1942,11 @@ static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, - DebugLoc &dl, - unsigned &NewOpc, unsigned &EvenReg, - unsigned &OddReg, unsigned &BaseReg, - int &Offset, unsigned &PredReg, + DebugLoc &dl, unsigned &NewOpc, + unsigned &FirstReg, + unsigned &SecondReg, + unsigned &BaseReg, int &Offset, + unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2) { // Make sure we're allowed to generate LDRD/STRD. @@ -2016,9 +2005,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; Offset = ARM_AM::getAM3Opc(AddSub, OffImm); } - EvenReg = Op0->getOperand(0).getReg(); - OddReg = Op1->getOperand(0).getReg(); - if (EvenReg == OddReg) + FirstReg = Op0->getOperand(0).getReg(); + SecondReg = Op1->getOperand(0).getReg(); + if (FirstReg == SecondReg) return false; BaseReg = Op0->getOperand(1).getReg(); Pred = getInstrPredicate(Op0, PredReg); @@ -2114,7 +2103,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, // to try to allocate a pair of registers that can form register pairs. MachineInstr *Op0 = Ops.back(); MachineInstr *Op1 = Ops[Ops.size()-2]; - unsigned EvenReg = 0, OddReg = 0; + unsigned FirstReg = 0, SecondReg = 0; unsigned BaseReg = 0, PredReg = 0; ARMCC::CondCodes Pred = ARMCC::AL; bool isT2 = false; @@ -2122,21 +2111,21 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, int Offset = 0; DebugLoc dl; if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, - EvenReg, OddReg, BaseReg, + FirstReg, SecondReg, BaseReg, Offset, PredReg, Pred, isT2)) { Ops.pop_back(); Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); - MRI->constrainRegClass(EvenReg, TRC); - MRI->constrainRegClass(OddReg, TRC); + MRI->constrainRegClass(FirstReg, TRC); + MRI->constrainRegClass(SecondReg, TRC); // Form the pair instruction. if (isLd) { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) - .addReg(EvenReg, RegState::Define) - .addReg(OddReg, RegState::Define) + .addReg(FirstReg, RegState::Define) + .addReg(SecondReg, RegState::Define) .addReg(BaseReg); // FIXME: We're converting from LDRi12 to an insn that still // uses addrmode2, so we need an explicit offset reg. It should @@ -2149,8 +2138,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, ++NumLDRDFormed; } else { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) - .addReg(EvenReg) - .addReg(OddReg) + .addReg(FirstReg) + .addReg(SecondReg) .addReg(BaseReg); // FIXME: We're converting from LDRi12 to an insn that still // uses addrmode2, so we need an explicit offset reg. It should @@ -2165,9 +2154,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MBB->erase(Op0); MBB->erase(Op1); - // Add register allocation hints to form register pairs. - MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); - MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); + if (!isT2) { + // Add register allocation hints to form register pairs. + MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg); + MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg); + } } else { for (unsigned i = 0; i != NumMove; ++i) { MachineInstr *Op = Ops.back(); @@ -2292,8 +2283,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { } -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. +/// Returns an instance of the load / store optimization pass. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { if (PreAlloc) return new ARMPreAllocLoadStoreOpt(); diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index e370b962ba7f..a2aca2d1a69e 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -30,35 +30,35 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK; switch (Option) { default: { - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); switch (Option) { default: llvm_unreachable("Unknown target flag on symbol operand"); case ARMII::MO_NO_FLAG: break; case ARMII::MO_LO16: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - Expr = ARMMCExpr::CreateLower16(Expr, OutContext); + Expr = ARMMCExpr::createLower16(Expr, OutContext); break; case ARMII::MO_HI16: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - Expr = ARMMCExpr::CreateUpper16(Expr, OutContext); + Expr = ARMMCExpr::createUpper16(Expr, OutContext); break; } break; } case ARMII::MO_PLT: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_PLT, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_PLT, OutContext); break; } if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(MO.getOffset(), OutContext), OutContext); return MCOperand::createExpr(Expr); @@ -80,7 +80,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_GlobalAddress: { diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index e794fb71af63..0aceaed87510 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -304,10 +304,6 @@ public: return getTM<ARMBaseTargetMachine>(); } - const ARMSubtarget &getARMSubtarget() const { - return *getARMTargetMachine().getSubtargetImpl(); - } - void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; @@ -330,24 +326,28 @@ void ARMPassConfig::addIRPasses() { // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. - const ARMSubtarget *Subtarget = &getARMSubtarget(); - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) - if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass()); + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass(-1, [this](const Function &F) { + const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F); + return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); + })); TargetPassConfig::addIRPasses(); } bool ARMPassConfig::addPreISel() { - if ((TM->getOptLevel() == CodeGenOpt::Aggressive && + if ((TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge == cl::BOU_UNSET) || - EnableGlobalMerge == cl::BOU_TRUE) + EnableGlobalMerge == cl::BOU_TRUE) { // FIXME: This is using the thumb1 only constant value for // maximal global offset for merging globals. We may want // to look into using the old value for non-thumb1 code of // 4095 based on the TargetMachine, but this starts to become // tricky when doing code gen per function. - addPass(createGlobalMergePass(TM, 127)); + bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && + (EnableGlobalMerge == cl::BOU_UNSET); + addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize)); + } return false; } @@ -387,10 +387,13 @@ void ARMPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) { // in v8, IfConversion depends on Thumb instruction widths - if (getARMSubtarget().restrictIT()) - addPass(createThumb2SizeReductionPass()); - if (!getARMSubtarget().isThumb1Only()) - addPass(&IfConverterID); + addPass(createThumb2SizeReductionPass([this](const Function &F) { + return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT(); + })); + + addPass(createIfConverter([this](const Function &F) { + return !this->TM->getSubtarget<ARMSubtarget>(F).isThumb1Only(); + })); } addPass(createThumb2ITBlockPass()); } @@ -399,8 +402,9 @@ void ARMPassConfig::addPreEmitPass() { addPass(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. - if (getARMSubtarget().isThumb2()) - addPass(&UnpackMachineBundlesID); + addPass(createUnpackMachineBundles([this](const Function &F) { + return this->TM->getSubtarget<ARMSubtarget>(F).isThumb2(); + })); // Don't optimize barriers at -O0. if (getOptLevel() != CodeGenOpt::None) diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 80f03c62bbfb..eaed5cc68750 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -50,12 +50,12 @@ const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); - return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang), + return MCSymbolRefExpr::create(TM.getSymbol(GV, Mang), MCSymbolRefExpr::VK_ARM_TARGET2, getContext()); } const MCExpr *ARMElfTargetObjectFile:: getDebugThreadLocalSymbol(const MCSymbol *Sym) const { - return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO, + return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO, getContext()); } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 30c7d62e84b8..8bcbb1159f81 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1051,7 +1051,7 @@ public: if (!CE) return false; int64_t Value = CE->getValue(); return (ARM_AM::getSOImmVal(Value) != -1 || - ARM_AM::getSOImmVal(-Value) != -1);; + ARM_AM::getSOImmVal(-Value) != -1); } bool isT2SOImm() const { if (!isImm()) return false; @@ -4252,7 +4252,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) { Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } - Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val, + Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, Tok.getEndLoc())); return MatchOperand_Success; @@ -4656,7 +4656,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) { Val = INT32_MIN; Operands.push_back( - ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E)); + ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, E)); return MatchOperand_Success; } @@ -4886,7 +4886,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { // If the constant was #-0, represent it as INT32_MIN. int32_t Val = CE->getValue(); if (isNegative && Val == 0) - CE = MCConstantExpr::Create(INT32_MIN, getContext()); + CE = MCConstantExpr::create(INT32_MIN, getContext()); // Now we should have the closing ']' if (Parser.getTok().isNot(AsmToken::RBrac)) @@ -5073,7 +5073,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { IntVal ^= (uint64_t)isNegative << 31; Parser.Lex(); // Eat the token. Operands.push_back(ARMOperand::CreateImm( - MCConstantExpr::Create(IntVal, getContext()), + MCConstantExpr::create(IntVal, getContext()), S, Parser.getTok().getLoc())); return MatchOperand_Success; } @@ -5090,7 +5090,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { Val = APFloat(RealVal).bitcastToAPInt().getZExtValue(); Operands.push_back(ARMOperand::CreateImm( - MCConstantExpr::Create(Val, getContext()), S, + MCConstantExpr::create(Val, getContext()), S, Parser.getTok().getLoc())); return MatchOperand_Success; } @@ -5179,7 +5179,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { if (CE) { int32_t Val = CE->getValue(); if (isNegative && Val == 0) - ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); + ImmVal = MCConstantExpr::create(INT32_MIN, getContext()); } E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); @@ -5209,7 +5209,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { if (getParser().parseExpression(SubExprVal)) return true; - const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal, + const MCExpr *ExprVal = ARMMCExpr::create(RefKind, SubExprVal, getContext()); E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E)); @@ -5765,7 +5765,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Add the processor imod operand, if necessary. if (ProcessorIMod) { Operands.push_back(ARMOperand::CreateImm( - MCConstantExpr::Create(ProcessorIMod, getContext()), + MCConstantExpr::create(ProcessorIMod, getContext()), NameLoc, NameLoc)); } else if (Mnemonic == "cps" && isMClass()) { return Error(NameLoc, "instruction 'cps' requires effect for M-class"); @@ -6752,13 +6752,13 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, MCSymbol *Dot = getContext().createTempSymbol(); Out.EmitLabel(Dot); const MCExpr *OpExpr = Inst.getOperand(2).getExpr(); - const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot, + const MCExpr *InstPC = MCSymbolRefExpr::create(Dot, MCSymbolRefExpr::VK_None, getContext()); - const MCExpr *Const8 = MCConstantExpr::Create(8, getContext()); - const MCExpr *ReadPC = MCBinaryExpr::CreateAdd(InstPC, Const8, + const MCExpr *Const8 = MCConstantExpr::create(8, getContext()); + const MCExpr *ReadPC = MCBinaryExpr::createAdd(InstPC, Const8, getContext()); - const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr, + const MCExpr *FixupAddr = MCBinaryExpr::createAdd(ReadPC, OpExpr, getContext()); TmpInst.addOperand(MCOperand::createExpr(FixupAddr)); } @@ -9168,74 +9168,19 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { StringRef CPU = getParser().parseStringToEndOfStatement().trim(); getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU); + // FIXME: This is using table-gen data, but should be moved to + // ARMTargetParser once that is table-gen'd. if (!STI.isCPUStringValid(CPU)) { Error(L, "Unknown CPU name"); return false; } - // FIXME: This switches the CPU features globally, therefore it might - // happen that code you would not expect to assemble will. For details - // see: http://llvm.org/bugs/show_bug.cgi?id=20757 STI.InitMCProcessorInfo(CPU, ""); STI.InitCPUSchedModel(CPU); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); return false; } - -// FIXME: This is duplicated in getARMFPUFeatures() in -// tools/clang/lib/Driver/Tools.cpp -static const struct { - const unsigned ID; - const FeatureBitset Enabled; - const FeatureBitset Disabled; -} FPUs[] = { - {/* ID */ ARM::FK_VFP, - /* Enabled */ {ARM::FeatureVFP2}, - /* Disabled */ {ARM::FeatureNEON}}, - {/* ID */ ARM::FK_VFPV2, - /* Enabled */ {ARM::FeatureVFP2}, - /* Disabled */ {ARM::FeatureNEON}}, - {/* ID */ ARM::FK_VFPV3, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3}, - /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}}, - {/* ID */ ARM::FK_VFPV3_D16, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureD16}, - /* Disabled */ {ARM::FeatureNEON}}, - {/* ID */ ARM::FK_VFPV4, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4}, - /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}}, - {/* ID */ ARM::FK_VFPV4_D16, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureD16}, - /* Disabled */ {ARM::FeatureNEON}}, - {/* ID */ ARM::FK_FPV5_D16, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureFPARMv8, ARM::FeatureD16}, - /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto}}, - {/* ID */ ARM::FK_FP_ARMV8, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureFPARMv8}, - /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto, ARM::FeatureD16}}, - {/* ID */ ARM::FK_NEON, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON}, - /* Disabled */ {ARM::FeatureD16}}, - {/* ID */ ARM::FK_NEON_VFPV4, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureNEON}, - /* Disabled */ {ARM::FeatureD16}}, - {/* ID */ ARM::FK_NEON_FP_ARMV8, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureFPARMv8, ARM::FeatureNEON}, - /* Disabled */ {ARM::FeatureCrypto, ARM::FeatureD16}}, - {/* ID */ ARM::FK_CRYPTO_NEON_FP_ARMV8, - /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4, - ARM::FeatureFPARMv8, ARM::FeatureNEON, - ARM::FeatureCrypto}, - /* Disabled */ {ARM::FeatureD16}}, - {ARM::FK_SOFTVFP, {}, {}}, -}; - /// parseDirectiveFPU /// ::= .fpu str bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { @@ -9243,23 +9188,15 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { StringRef FPU = getParser().parseStringToEndOfStatement().trim(); unsigned ID = ARMTargetParser::parseFPU(FPU); - - if (ID == ARM::FK_INVALID) { + std::vector<const char *> Features; + if (!ARMTargetParser::getFPUFeatures(ID, Features)) { Error(FPUNameLoc, "Unknown FPU name"); return false; } - for (const auto &Entry : FPUs) { - if (Entry.ID != ID) - continue; - - // Need to toggle features that should be on but are off and that - // should off but are on. - FeatureBitset Toggle = (Entry.Enabled & ~STI.getFeatureBits()) | - (Entry.Disabled & STI.getFeatureBits()); - setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle))); - break; - } + for (auto Feature : Features) + STI.ApplyFeatureFlag(Feature); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); getTargetStreamer().emitFPU(ID); return false; @@ -9804,7 +9741,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { } const MCSymbolRefExpr *SRE = - MCSymbolRefExpr::Create(Parser.getTok().getIdentifier(), + MCSymbolRefExpr::create(Parser.getTok().getIdentifier(), MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext()); Lex(); @@ -9982,33 +9919,32 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" +// FIXME: This structure should be moved inside ARMTargetParser +// when we start to table-generate them, and we can use the ARM +// flags below, that were generated by table-gen. static const struct { - const char *Name; + const ARM::ArchExtKind Kind; const unsigned ArchCheck; const FeatureBitset Features; } Extensions[] = { - { "crc", Feature_HasV8, {ARM::FeatureCRC} }, - { "crypto", Feature_HasV8, + { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} }, + { ARM::AEK_CRYPTO, Feature_HasV8, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, - { "fp", Feature_HasV8, {ARM::FeatureFPARMv8} }, - { "idiv", Feature_HasV7 | Feature_IsNotMClass, + { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} }, + { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} }, - // FIXME: iWMMXT not supported - { "iwmmxt", Feature_None, {} }, - // FIXME: iWMMXT2 not supported - { "iwmmxt2", Feature_None, {} }, - // FIXME: Maverick not supported - { "maverick", Feature_None, {} }, - { "mp", Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, - // FIXME: ARMv6-m OS Extensions feature not checked - { "os", Feature_None, {} }, + { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, + { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, // FIXME: Also available in ARMv6-K - { "sec", Feature_HasV7, {ARM::FeatureTrustZone} }, - { "simd", Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, + { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} }, // FIXME: Only available in A-class, isel not predicated - { "virt", Feature_HasV7, {ARM::FeatureVirtualization} }, - // FIXME: xscale not supported - { "xscale", Feature_None, {} }, + { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} }, + // FIXME: Unsupported extensions. + { ARM::AEK_OS, Feature_None, {} }, + { ARM::AEK_IWMMXT, Feature_None, {} }, + { ARM::AEK_IWMMXT2, Feature_None, {} }, + { ARM::AEK_MAVERICK, Feature_None, {} }, + { ARM::AEK_XSCALE, Feature_None, {} }, }; /// parseDirectiveArchExtension @@ -10031,9 +9967,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { EnableFeature = false; Name = Name.substr(2); } + unsigned FeatureKind = ARMTargetParser::parseArchExt(Name); + if (FeatureKind == ARM::AEK_INVALID) + Error(ExtLoc, "unknown architectural extension: " + Name); for (const auto &Extension : Extensions) { - if (Extension.Name != Name) + if (Extension.Kind != FeatureKind) continue; if (Extension.Features.none()) @@ -10080,7 +10019,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, if (Op.isImm()) { const MCExpr *SOExpr = Op.getImm(); int64_t Value; - if (!SOExpr->EvaluateAsAbsolute(Value)) + if (!SOExpr->evaluateAsAbsolute(Value)) return Match_Success; assert((Value >= INT32_MIN && Value <= UINT32_MAX) && "expression value must be representable in 32 bits"); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2d36c3020016..0bff52141da5 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -329,7 +329,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCExpr *Expr = Op.getExpr(); switch (Expr->getKind()) { case MCExpr::Binary: - O << '#' << *Expr; + O << '#'; + Expr->print(O, &MAI); break; case MCExpr::Constant: { // If a symbolic branch target was added as a constant expression then @@ -337,8 +338,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // address. const MCConstantExpr *Constant = cast<MCConstantExpr>(Expr); int64_t TargetAddress; - if (!Constant->EvaluateAsAbsolute(TargetAddress)) { - O << '#' << *Expr; + if (!Constant->evaluateAsAbsolute(TargetAddress)) { + O << '#'; + Expr->print(O, &MAI); } else { O << "0x"; O.write_hex(static_cast<uint32_t>(TargetAddress)); @@ -348,7 +350,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, default: // FIXME: Should we always treat this as if it is a constant literal and // prefix it with '#'? - O << *Expr; + Expr->print(O, &MAI); break; } } @@ -359,7 +361,7 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); if (MO1.isExpr()) { - O << *MO1.getExpr(); + MO1.getExpr()->print(O, &MAI); return; } @@ -1055,7 +1057,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO = MI->getOperand(OpNum); if (MO.isExpr()) { - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); return; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index f0eed9b811d4..b03cada9a641 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -622,8 +622,6 @@ namespace ARM_AM { return Value; } - AMSubMode getLoadStoreMultipleSubMode(int Opcode); - //===--------------------------------------------------------------------===// // Floating-point Immediates // diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 6c1f7891f58a..be23e9070103 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -260,9 +260,9 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding; uint64_t NumNops = Count / 2; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write16(nopEncoding); + OW->write16(nopEncoding); if (Count & 1) - OW->Write8(0); + OW->write8(0); return true; } // ARM mode @@ -270,21 +270,21 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding; uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(nopEncoding); + OW->write32(nopEncoding); // FIXME: should this function return false when unable to write exactly // 'Count' bytes with NOP encodings? switch (Count % 4) { default: break; // No leftover bytes to write case 1: - OW->Write8(0); + OW->write8(0); break; case 2: - OW->Write16(0); + OW->write16(0); break; case 3: - OW->Write16(0); - OW->Write8(0xa0); + OW->write16(0); + OW->write8(0xa0); break; } @@ -601,8 +601,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // the offset when the destination has the same MCFragment. if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { const MCSymbol &Sym = A->getSymbol(); - const MCSymbolData &SymData = Asm.getSymbolData(Sym); - IsResolved = (SymData.getFragment() == DF); + IsResolved = (Sym.getFragment() == DF); } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index f4fedeef650b..804d3534096a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -37,7 +37,7 @@ namespace { unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; - bool needsRelocateWithSymbol(const MCSymbolData &SD, + bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; }; } @@ -49,7 +49,7 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) ARMELFObjectWriter::~ARMELFObjectWriter() {} -bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, +bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const { // FIXME: This is extremely conservative. This really needs to use a // whitelist with a clear explanation for why each realocation needs to diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 0eb5a8136e88..6e3af739eca2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -22,9 +22,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" @@ -34,7 +32,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" @@ -216,7 +214,13 @@ ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { } void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { - OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n'; + const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); + + OS << "\t.thumb_set\t"; + Symbol->print(OS, MAI); + OS << ", "; + Value->print(OS, MAI); + OS << '\n'; } void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) { @@ -562,17 +566,16 @@ private: MCSymbol *Start = getContext().createTempSymbol(); EmitLabel(Start); - MCSymbol *Symbol = - getContext().getOrCreateSymbol(Name + "." + - Twine(MappingSymbolCounter++)); + auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++))); - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetType(SD, ELF::STT_NOTYPE); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); + getAssembler().registerSymbol(*Symbol); + Symbol->setType(ELF::STT_NOTYPE); + Symbol->setBinding(ELF::STB_LOCAL); + Symbol->setExternal(false); AssignSection(Symbol, getCurrentSection().first); - const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext()); Symbol->setVariableValue(Value); } @@ -688,16 +691,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { using namespace ARMBuildAttrs; setAttributeItem(CPU_name, - ARMTargetParser::getArchDefaultCPUName(Arch), + ARMTargetParser::getCPUAttr(Arch), false); if (EmittedArch == ARM::AK_INVALID) setAttributeItem(CPU_arch, - ARMTargetParser::getArchDefaultCPUArch(Arch), + ARMTargetParser::getArchAttr(Arch), false); else setAttributeItem(CPU_arch, - ARMTargetParser::getArchDefaultCPUArch(EmittedArch), + ARMTargetParser::getArchAttr(EmittedArch), false); switch (Arch) { @@ -813,6 +816,9 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + // ABI_HardFP_use is handled in ARMAsmPrinter, so _SP_D16 is treated the same + // as _D16 here. + case ARM::FK_FPV4_SP_D16: case ARM::FK_VFPV4_D16: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4B, @@ -827,6 +833,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so // uses the FP_ARMV8_D16 build attribute. + case ARM::FK_FPV5_SP_D16: case ARM::FK_FPV5_D16: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8B, @@ -861,6 +868,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { break; case ARM::FK_SOFTVFP: + case ARM::FK_NONE: break; default: @@ -972,9 +980,9 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) { if (!Streamer.IsThumb) return; - const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol); - unsigned Type = MCELF::GetType(SD); - if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc) + Streamer.getAssembler().registerSymbol(*Symbol); + unsigned Type = cast<MCSymbolELF>(Symbol)->getType(); + if (Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC) Streamer.EmitThumbFunc(Symbol); } @@ -1024,7 +1032,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } // Get .ARM.extab or .ARM.exidx section - const MCSymbol *Group = FnSection.getGroup(); + const MCSymbolELF *Group = FnSection.getGroup(); if (Group) Flags |= ELF::SHF_GROUP; MCSectionELF *EHSection = @@ -1095,7 +1103,7 @@ void ARMELFStreamer::emitFnEnd() { EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex)); const MCSymbolRefExpr *FnStartRef = - MCSymbolRefExpr::Create(FnStart, + MCSymbolRefExpr::create(FnStart, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); @@ -1106,7 +1114,7 @@ void ARMELFStreamer::emitFnEnd() { } else if (ExTab) { // Emit a reference to the unwind opcodes in the ".ARM.extab" section. const MCSymbolRefExpr *ExTabEntryRef = - MCSymbolRefExpr::Create(ExTab, + MCSymbolRefExpr::create(ExTab, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); EmitValue(ExTabEntryRef, 4); @@ -1138,7 +1146,7 @@ void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; } void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { const MCSymbol *PersonalitySym = getContext().getOrCreateSymbol(Name); - const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::create( PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext()); visitUsedExpr(*PersonalityRef); @@ -1186,7 +1194,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { // Emit personality if (Personality) { const MCSymbolRefExpr *PersonalityRef = - MCSymbolRefExpr::Create(Personality, + MCSymbolRefExpr::create(Personality, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index caa873622ae9..1ac08159bd3d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -19,8 +19,7 @@ using namespace llvm; void ARMMCAsmInfoDarwin::anchor() { } -ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) { - Triple TheTriple(TT); +ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) { if ((TheTriple.getArch() == Triple::armeb) || (TheTriple.getArch() == Triple::thumbeb)) IsLittleEndian = false; @@ -41,8 +40,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) { void ARMELFMCAsmInfo::anchor() { } -ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) { if ((TheTriple.getArch() == Triple::armeb) || (TheTriple.getArch() == Triple::thumbeb)) IsLittleEndian = false; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 6cb471537f6e..99a5fff5ec27 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -19,18 +19,19 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { + class Triple; class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { virtual void anchor(); public: - explicit ARMMCAsmInfoDarwin(StringRef TT); + explicit ARMMCAsmInfoDarwin(const Triple &TheTriple); }; class ARMELFMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit ARMELFMCAsmInfo(StringRef TT); + explicit ARMELFMCAsmInfo(const Triple &TT); void setUseIntegratedAssembler(bool Value) override; }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 5b90de327418..2063ca6bdf3b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -16,12 +16,12 @@ using namespace llvm; #define DEBUG_TYPE "armmcexpr" const ARMMCExpr* -ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, +ARMMCExpr::create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { return new (Ctx) ARMMCExpr(Kind, Expr); } -void ARMMCExpr::PrintImpl(raw_ostream &OS) const { +void ARMMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { switch (Kind) { default: llvm_unreachable("Invalid kind!"); case VK_ARM_HI16: OS << ":upper16:"; break; @@ -31,7 +31,7 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const { const MCExpr *Expr = getSubExpr(); if (Expr->getKind() != MCExpr::SymbolRef) OS << '('; - Expr->print(OS); + Expr->print(OS, MAI); if (Expr->getKind() != MCExpr::SymbolRef) OS << ')'; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index a52abe7760d1..9146d4def75a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -33,15 +33,15 @@ public: /// @name Construction /// @{ - static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + static const ARMMCExpr *create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx); - static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_ARM_HI16, Expr, Ctx); + static const ARMMCExpr *createUpper16(const MCExpr *Expr, MCContext &Ctx) { + return create(VK_ARM_HI16, Expr, Ctx); } - static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_ARM_LO16, Expr, Ctx); + static const ARMMCExpr *createLower16(const MCExpr *Expr, MCContext &Ctx) { + return create(VK_ARM_LO16, Expr, Ctx); } /// @} @@ -56,15 +56,15 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); + MCSection *findAssociatedSection() const override { + return getSubExpr()->findAssociatedSection(); } // There are no TLS ARMMCExprs at the moment. diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 30deba9a08c6..92c4d6a824ea 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -277,18 +277,17 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { - Triple TheTriple(TT); - +static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TheTriple) { MCAsmInfo *MAI; if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO()) - MAI = new ARMMCAsmInfoDarwin(TT); + MAI = new ARMMCAsmInfoDarwin(TheTriple); else if (TheTriple.isWindowsItaniumEnvironment()) MAI = new ARMCOFFMCAsmInfoGNU(); else if (TheTriple.isWindowsMSVCEnvironment()) MAI = new ARMCOFFMCAsmInfoMicrosoft(); else - MAI = new ARMELFMCAsmInfo(TT); + MAI = new ARMELFMCAsmInfo(TheTriple); unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true); MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0)); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index d4b00e6e4fb5..4468132588cf 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -26,9 +26,9 @@ public: unsigned VariantKind) override { switch(VariantKind) { case LLVMDisassembler_VariantKind_ARM_HI16: - return ARMMCExpr::CreateUpper16(SubExpr, Ctx); + return ARMMCExpr::createUpper16(SubExpr, Ctx); case LLVMDisassembler_VariantKind_ARM_LO16: - return ARMMCExpr::CreateLower16(SubExpr, Ctx); + return ARMMCExpr::createLower16(SubExpr, Ctx); default: return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr, VariantKind); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 9755330bf8c3..95d7ea7c04a3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -17,7 +17,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" @@ -49,12 +48,10 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { const MCSymbol &S, uint64_t FixedValue); public: - ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/true) {} + ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; @@ -152,23 +149,21 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData *A_SD = &Asm.getSymbolData(*A); - if (!A_SD->getFragment()) + if (!A->getFragment()) Asm.getContext().reportFatalError(Fixup.getLoc(), "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(*A, Layout); uint32_t Value2 = 0; - uint64_t SecAddr = - Writer->getSectionAddress(A_SD->getFragment()->getParent()); + uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); FixedValue += SecAddr; if (const MCSymbolRefExpr *B = Target.getSymB()) { - const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbol *SB = &B->getSymbol(); - if (!B_SD->getFragment()) + if (!SB->getFragment()) Asm.getContext().reportFatalError(Fixup.getLoc(), "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); @@ -176,7 +171,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // Select the appropriate difference relocation type. Type = MachO::ARM_RELOC_HALF_SECTDIFF; Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); - FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. @@ -255,24 +250,22 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData *A_SD = &Asm.getSymbolData(*A); - if (!A_SD->getFragment()) + if (!A->getFragment()) Asm.getContext().reportFatalError(Fixup.getLoc(), "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(*A, Layout); - uint64_t SecAddr = - Writer->getSectionAddress(A_SD->getFragment()->getParent()); + uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); FixedValue += SecAddr; uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols"); - const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbol *SB = &B->getSymbol(); - if (!B_SD->getFragment()) + if (!SB->getFragment()) Asm.getContext().reportFatalError(Fixup.getLoc(), "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); @@ -280,7 +273,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, // Select the appropriate difference relocation type. Type = MachO::ARM_RELOC_SECTDIFF; Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); - FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. @@ -344,7 +337,7 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, return false; } -void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, +void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -405,7 +398,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // Resolve constant variables. if (A->isVariable()) { int64_t Res; - if (A->getVariableValue()->EvaluateAsAbsolute( + if (A->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index b62ae2e3429e..68736bc1decd 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -94,12 +94,12 @@ static void TrackDefUses(MachineInstr *MI, /// conservatively remove more kill flags than are necessary, but removing them /// is safer than incorrect kill flags remaining on instructions. static void ClearKillFlags(MachineInstr *MI, SmallSet<unsigned, 4> &Uses) { - for (MIOperands MO(MI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->isDef() || !MO->isKill()) + for (MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isDef() || !MO.isKill()) continue; - if (!Uses.count(MO->getReg())) + if (!Uses.count(MO.getReg())) continue; - MO->setIsKill(false); + MO.setIsKill(false); } } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 0ab1ff906c9a..d9ab824995c1 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -133,7 +133,7 @@ namespace { class Thumb2SizeReduce : public MachineFunctionPass { public: static char ID; - Thumb2SizeReduce(); + Thumb2SizeReduce(std::function<bool(const Function &)> Ftor); const Thumb2InstrInfo *TII; const ARMSubtarget *STI; @@ -198,11 +198,14 @@ namespace { }; SmallVector<MBBInfo, 8> BlockInfo; + + std::function<bool(const Function &)> PredicateFtor; }; char Thumb2SizeReduce::ID = 0; } -Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { +Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) + : MachineFunctionPass(ID), PredicateFtor(Ftor) { OptimizeSize = MinimizeSize = false; for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; @@ -1000,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { + if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + return false; + STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); if (STI->isThumb1Only() || STI->prefers32BitThumb()) return false; @@ -1025,6 +1031,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size /// reduction pass. -FunctionPass *llvm::createThumb2SizeReductionPass() { - return new Thumb2SizeReduce(); +FunctionPass *llvm::createThumb2SizeReductionPass( + std::function<bool(const Function &)> Ftor) { + return new Thumb2SizeReduce(Ftor); } diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index 32375968eac1..10ec6587550b 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -83,5 +83,7 @@ void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Force static initialization. extern "C" void LLVMInitializeBPFAsmPrinter() { - RegisterAsmPrinter<BPFAsmPrinter> X(TheBPFTarget); + RegisterAsmPrinter<BPFAsmPrinter> X(TheBPFleTarget); + RegisterAsmPrinter<BPFAsmPrinter> Y(TheBPFbeTarget); + RegisterAsmPrinter<BPFAsmPrinter> Z(TheBPFTarget); } diff --git a/lib/Target/BPF/BPFMCInstLower.cpp b/lib/Target/BPF/BPFMCInstLower.cpp index d608afb348cb..00bd8d9c090c 100644 --- a/lib/Target/BPF/BPFMCInstLower.cpp +++ b/lib/Target/BPF/BPFMCInstLower.cpp @@ -33,7 +33,7 @@ BPFMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); if (!MO.isJTI() && MO.getOffset()) llvm_unreachable("unknown symbol op"); @@ -63,7 +63,7 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::createExpr( - MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_RegisterMask: continue; diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index 9487427fef5e..3329d5f87409 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -23,19 +23,24 @@ using namespace llvm; extern "C" void LLVMInitializeBPFTarget() { // Register the target. - RegisterTargetMachine<BPFTargetMachine> X(TheBPFTarget); + RegisterTargetMachine<BPFTargetMachine> X(TheBPFleTarget); + RegisterTargetMachine<BPFTargetMachine> Y(TheBPFbeTarget); + RegisterTargetMachine<BPFTargetMachine> Z(TheBPFTarget); +} + +// DataLayout: little or big endian +static std::string computeDataLayout(StringRef TT) { + if (Triple(TT).getArch() == Triple::bpfeb) + return "E-m:e-p:64:64-i64:64-n32:64-S128"; + else + return "e-m:e-p:64:64-i64:64-n32:64-S128"; } -// DataLayout --> Little-endian, 64-bit pointer/ABI/alignment -// The stack is always 8 byte aligned -// On function prologue, the stack is created by decrementing -// its pointer. Once decremented, all references are done with positive -// offset from the stack/frame pointer. BPFTargetMachine::BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, "e-m:e-p:64:64-i64:64-n32:64-S128", TT, CPU, FS, + : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique<TargetLoweringObjectFileELF>()), Subtarget(TT, CPU, FS, *this) { diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 48f34e484590..7b1d9259caf9 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -25,7 +25,10 @@ using namespace llvm; namespace { class BPFAsmBackend : public MCAsmBackend { public: - BPFAsmBackend() : MCAsmBackend() {} + bool IsLittleEndian; + + BPFAsmBackend(bool IsLittleEndian) + : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {} ~BPFAsmBackend() override {} void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, @@ -54,7 +57,7 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return false; for (uint64_t i = 0; i < Count; i += 8) - OW->Write64(0x15000000); + OW->write64(0x15000000); return true; } @@ -69,17 +72,28 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } assert(Fixup.getKind() == FK_PCRel_2); Value = (uint16_t)((Value - 8) / 8); - Data[Fixup.getOffset() + 2] = Value & 0xFF; - Data[Fixup.getOffset() + 3] = Value >> 8; + if (IsLittleEndian) { + Data[Fixup.getOffset() + 2] = Value & 0xFF; + Data[Fixup.getOffset() + 3] = Value >> 8; + } else { + Data[Fixup.getOffset() + 2] = Value >> 8; + Data[Fixup.getOffset() + 3] = Value & 0xFF; + } } MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { - return createBPFELFObjectWriter(OS, 0); + return createBPFELFObjectWriter(OS, 0, IsLittleEndian); } } MCAsmBackend *llvm::createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU) { - return new BPFAsmBackend(); + return new BPFAsmBackend(/*IsLittleEndian=*/true); +} + +MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T, + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU) { + return new BPFAsmBackend(/*IsLittleEndian=*/false); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index a5562c1a933e..05ba6183e322 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -47,7 +47,8 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target, } } -MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { +MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian) { MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); + return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index ab61ae7ae662..d63bbf49294e 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -16,13 +16,18 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/ADT/Triple.h" namespace llvm { class Target; +class Triple; class BPFMCAsmInfo : public MCAsmInfo { public: - explicit BPFMCAsmInfo(StringRef TT) { + explicit BPFMCAsmInfo(const Triple &TT) { + if (TT.getArch() == Triple::bpfeb) + IsLittleEndian = false; + PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp index ba8a874e4966..dc4ede30f191 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp @@ -30,9 +30,11 @@ class BPFMCCodeEmitter : public MCCodeEmitter { BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete; void operator=(const BPFMCCodeEmitter &) = delete; const MCRegisterInfo &MRI; + bool IsLittleEndian; public: - BPFMCCodeEmitter(const MCRegisterInfo &mri) : MRI(mri) {} + BPFMCCodeEmitter(const MCRegisterInfo &mri, bool IsLittleEndian) + : MRI(mri), IsLittleEndian(IsLittleEndian) {} ~BPFMCCodeEmitter() {} @@ -61,7 +63,13 @@ public: MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx) { - return new BPFMCCodeEmitter(MRI); + return new BPFMCCodeEmitter(MRI, true); +} + +MCCodeEmitter *llvm::createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new BPFMCCodeEmitter(MRI, false); } unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI, @@ -91,59 +99,53 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI, return 0; } -// Emit one byte through output stream -void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) { - OS << (char)C; - ++CurByte; -} - -// Emit a series of bytes (little endian) -void EmitLEConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) { - assert(Size <= 8 && "size too big in emit constant"); - - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, CurByte, OS); - Val >>= 8; - } -} - -// Emit a series of bytes (big endian) -void EmitBEConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) { - assert(Size <= 8 && "size too big in emit constant"); - - for (int i = (Size - 1) * 8; i >= 0; i -= 8) - EmitByte((Val >> i) & 255, CurByte, OS); +static uint8_t SwapBits(uint8_t Val) +{ + return (Val & 0x0F) << 4 | (Val & 0xF0) >> 4; } void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { unsigned Opcode = MI.getOpcode(); - // Keep track of the current byte being emitted - unsigned CurByte = 0; + support::endian::Writer<support::little> LE(OS); + support::endian::Writer<support::big> BE(OS); if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) { uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); - EmitByte(Value >> 56, CurByte, OS); - EmitByte(((Value >> 48) & 0xff), CurByte, OS); - EmitLEConstant(0, 2, CurByte, OS); - EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS); + LE.write<uint8_t>(Value >> 56); + if (IsLittleEndian) + LE.write<uint8_t>((Value >> 48) & 0xff); + else + LE.write<uint8_t>(SwapBits((Value >> 48) & 0xff)); + LE.write<uint16_t>(0); + if (IsLittleEndian) + LE.write<uint32_t>(Value & 0xffffFFFF); + else + BE.write<uint32_t>(Value & 0xffffFFFF); const MCOperand &MO = MI.getOperand(1); uint64_t Imm = MO.isImm() ? MO.getImm() : 0; - EmitByte(0, CurByte, OS); - EmitByte(0, CurByte, OS); - EmitLEConstant(0, 2, CurByte, OS); - EmitLEConstant(Imm >> 32, 4, CurByte, OS); + LE.write<uint8_t>(0); + LE.write<uint8_t>(0); + LE.write<uint16_t>(0); + if (IsLittleEndian) + LE.write<uint32_t>(Imm >> 32); + else + BE.write<uint32_t>(Imm >> 32); } else { // Get instruction encoding and emit it uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); - EmitByte(Value >> 56, CurByte, OS); - EmitByte((Value >> 48) & 0xff, CurByte, OS); - EmitLEConstant((Value >> 32) & 0xffff, 2, CurByte, OS); - EmitLEConstant(Value & 0xffffFFFF, 4, CurByte, OS); + LE.write<uint8_t>(Value >> 56); + if (IsLittleEndian) { + LE.write<uint8_t>((Value >> 48) & 0xff); + LE.write<uint16_t>((Value >> 32) & 0xffff); + LE.write<uint32_t>(Value & 0xffffFFFF); + } else { + LE.write<uint8_t>(SwapBits((Value >> 48) & 0xff)); + BE.write<uint16_t>((Value >> 32) & 0xffff); + BE.write<uint32_t>(Value & 0xffffFFFF); + } } } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index c4cf4b824508..7cedba90a746 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -79,32 +79,43 @@ static MCInstPrinter *createBPFMCInstPrinter(const Triple &T, } extern "C" void LLVMInitializeBPFTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo<BPFMCAsmInfo> X(TheBPFTarget); + for (Target *T : {&TheBPFleTarget, &TheBPFbeTarget, &TheBPFTarget}) { + // Register the MC asm info. + RegisterMCAsmInfo<BPFMCAsmInfo> X(*T); - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheBPFTarget, createBPFMCCodeGenInfo); + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createBPFMCCodeGenInfo); - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheBPFTarget, createBPFMCInstrInfo); + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createBPFMCInstrInfo); - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheBPFTarget, createBPFMCRegisterInfo); + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createBPFMCRegisterInfo); - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheBPFTarget, - createBPFMCSubtargetInfo); + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, + createBPFMCSubtargetInfo); - // Register the MC code emitter - TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, - llvm::createBPFMCCodeEmitter); + // Register the object streamer + TargetRegistry::RegisterELFStreamer(*T, createBPFMCStreamer); - // Register the ASM Backend - TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createBPFMCInstPrinter); + } - // Register the object streamer - TargetRegistry::RegisterELFStreamer(TheBPFTarget, createBPFMCStreamer); + // Register the MC code emitter + TargetRegistry::RegisterMCCodeEmitter(TheBPFleTarget, createBPFMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheBPFbeTarget, createBPFbeMCCodeEmitter); - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheBPFTarget, createBPFMCInstPrinter); + // Register the ASM Backend + TargetRegistry::RegisterMCAsmBackend(TheBPFleTarget, createBPFAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheBPFbeTarget, createBPFbeAsmBackend); + + if (sys::IsLittleEndianHost) { + TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, createBPFMCCodeEmitter); + TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend); + } else { + TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, createBPFbeMCCodeEmitter); + TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFbeAsmBackend); + } } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index ce08b7cf76e6..a9ba7d990e17 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -30,16 +30,24 @@ class StringRef; class raw_ostream; class raw_pwrite_stream; +extern Target TheBPFleTarget; +extern Target TheBPFbeTarget; extern Target TheBPFTarget; MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); +MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); +MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); -MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); +MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, bool IsLittleEndian); } // Defines symbolic names for BPF registers. This defines a mapping from diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp index 87716e6775cf..a16dbae867b2 100644 --- a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp +++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp @@ -11,8 +11,18 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -Target llvm::TheBPFTarget; +namespace llvm { +Target TheBPFleTarget; +Target TheBPFbeTarget; +Target TheBPFTarget; +} extern "C" void LLVMInitializeBPFTargetInfo() { - RegisterTarget<Triple::bpf, /*HasJIT=*/true> X(TheBPFTarget, "bpf", "BPF"); + TargetRegistry::RegisterTarget(TheBPFTarget, "bpf", + "BPF (host endian)", + [](Triple::ArchType) { return false; }, true); + RegisterTarget<Triple::bpfel, /*HasJIT=*/true> X( + TheBPFleTarget, "bpfel", "BPF (little endian)"); + RegisterTarget<Triple::bpfeb, /*HasJIT=*/true> Y( + TheBPFbeTarget, "bpfeb", "BPF (big endian)"); } diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 1805437b12f7..e6d0199952f4 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMTarget TargetLoweringObjectFile.cpp TargetMachine.cpp TargetMachineC.cpp + TargetRecip.cpp TargetSubtargetInfo.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index f1a7127e8fd9..b8377986ecc0 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -645,8 +645,7 @@ void CppWriter::printType(Type* Ty) { if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); Out << "ArrayType* " << typeName << " = ArrayType::get(" - << elemName - << ", " << utostr(AT->getNumElements()) << ");"; + << elemName << ", " << AT->getNumElements() << ");"; nl(Out); } break; @@ -658,8 +657,7 @@ void CppWriter::printType(Type* Ty) { if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); Out << "PointerType* " << typeName << " = PointerType::get(" - << elemName - << ", " << utostr(PT->getAddressSpace()) << ");"; + << elemName << ", " << PT->getAddressSpace() << ");"; nl(Out); } break; @@ -671,8 +669,7 @@ void CppWriter::printType(Type* Ty) { if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); Out << "VectorType* " << typeName << " = VectorType::get(" - << elemName - << ", " << utostr(PT->getNumElements()) << ");"; + << elemName << ", " << PT->getNumElements() << ");"; nl(Out); } break; @@ -1029,7 +1026,7 @@ void CppWriter::printVariableHead(const GlobalVariable *GV) { } if (GV->getAlignment()) { printCppName(GV); - Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; + Out << "->setAlignment(" << GV->getAlignment() << ");"; nl(Out); } if (GV->getVisibility() != GlobalValue::DefaultVisibility) { diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index a60d1e471944..14f9d777580c 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -7,9 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" + #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCExpr.h" @@ -27,6 +29,7 @@ #include <vector> using namespace llvm; +using namespace Hexagon; #define DEBUG_TYPE "hexagon-disassembler" @@ -37,9 +40,14 @@ namespace { /// \brief Hexagon disassembler for all Hexagon platforms. class HexagonDisassembler : public MCDisassembler { public: + std::unique_ptr<MCInst *> CurrentBundle; HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx) - : MCDisassembler(STI, Ctx) {} + : MCDisassembler(STI, Ctx), CurrentBundle(new MCInst *) {} + DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, raw_ostream &CStream, + bool &Complete) const; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &VStream, @@ -48,37 +56,43 @@ public: } static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, void const *Decoder); + uint64_t Address, + void const *Decoder); + +static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, + raw_ostream &os); +static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst); static const uint16_t IntRegDecoderTable[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, - Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, - Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, - Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, - Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, - Hexagon::R30, Hexagon::R31 }; + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, + Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, + Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, + Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, + Hexagon::R30, Hexagon::R31}; -static const uint16_t PredRegDecoderTable[] = { Hexagon::P0, Hexagon::P1, -Hexagon::P2, Hexagon::P3 }; +static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, + Hexagon::P2, Hexagon::P3}; static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, - const uint16_t Table[], size_t Size) { + const uint16_t Table[], size_t Size) { if (RegNo < Size) { Inst.addOperand(MCOperand::createReg(Table[RegNo])); return MCDisassembler::Success; - } - else + } else return MCDisassembler::Fail; } static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { + uint64_t /*Address*/, + void const *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -88,13 +102,13 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, const void *Decoder) { + uint64_t /*Address*/, + const void *Decoder) { static const uint16_t CtrlRegDecoderTable[] = { - Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, - Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7, - Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, - Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH - }; + Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, + Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7, + Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, + Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH}; if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0])) return MCDisassembler::Fail; @@ -108,17 +122,15 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, void const *Decoder) { + uint64_t /*Address*/, + void const *Decoder) { static const uint16_t CtrlReg64DecoderTable[] = { - Hexagon::C1_0, Hexagon::NoRegister, - Hexagon::C3_2, Hexagon::NoRegister, - Hexagon::NoRegister, Hexagon::NoRegister, - Hexagon::C7_6, Hexagon::NoRegister, - Hexagon::C9_8, Hexagon::NoRegister, - Hexagon::C11_10, Hexagon::NoRegister, - Hexagon::CS, Hexagon::NoRegister, - Hexagon::UPC, Hexagon::NoRegister - }; + Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2, + Hexagon::NoRegister, Hexagon::NoRegister, Hexagon::NoRegister, + Hexagon::C7_6, Hexagon::NoRegister, Hexagon::C9_8, + Hexagon::NoRegister, Hexagon::C11_10, Hexagon::NoRegister, + Hexagon::CS, Hexagon::NoRegister, Hexagon::UPC, + Hexagon::NoRegister}; if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0])) return MCDisassembler::Fail; @@ -132,7 +144,8 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, const void *Decoder) { + uint64_t /*Address*/, + const void *Decoder) { unsigned Register = 0; switch (RegNo) { case 0: @@ -149,22 +162,21 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, } static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, const void *Decoder) { + uint64_t /*Address*/, + const void *Decoder) { static const uint16_t DoubleRegDecoderTable[] = { - Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, - Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, - Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, - Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15 - }; + Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, + Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, + Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, + Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; - return (DecodeRegisterClass(Inst, RegNo >> 1, - DoubleRegDecoderTable, - sizeof (DoubleRegDecoderTable))); + return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable, + sizeof(DoubleRegDecoderTable))); } static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { + uint64_t /*Address*/, + void const *Decoder) { if (RegNo > 3) return MCDisassembler::Fail; @@ -191,17 +203,687 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, uint64_t Address, raw_ostream &os, raw_ostream &cs) const { - Size = 4; - if (Bytes.size() < 4) - return MCDisassembler::Fail; + DecodeStatus Result = DecodeStatus::Success; + bool Complete = false; + Size = 0; + + *CurrentBundle = &MI; + MI.setOpcode(Hexagon::BUNDLE); + MI.addOperand(MCOperand::createImm(0)); + while (Result == Success && Complete == false) { + if (Bytes.size() < HEXAGON_INSTR_SIZE) + return MCDisassembler::Fail; + MCInst *Inst = new (getContext()) MCInst; + Result = getSingleInstruction(*Inst, MI, Bytes, Address, os, cs, Complete); + MI.addOperand(MCOperand::createInst(Inst)); + Size += HEXAGON_INSTR_SIZE; + Bytes = Bytes.slice(HEXAGON_INSTR_SIZE); + } + return Result; +} + +DecodeStatus HexagonDisassembler::getSingleInstruction( + MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &os, raw_ostream &cs, bool &Complete) const { + assert(Bytes.size() >= HEXAGON_INSTR_SIZE); - uint32_t insn = + uint32_t Instruction = llvm::support::endian::read<uint32_t, llvm::support::little, llvm::support::unaligned>(Bytes.data()); - // Remove parse bits. - insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK); - DecodeStatus Result = decodeInstruction(DecoderTable32, MI, insn, Address, this, STI); - HexagonMCInstrInfo::AppendImplicitOperands(MI); + auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB); + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_LOOP_END) { + if (BundleSize == 0) + HexagonMCInstrInfo::setInnerLoop(MCB); + else if (BundleSize == 1) + HexagonMCInstrInfo::setOuterLoop(MCB); + else + return DecodeStatus::Fail; + } + + DecodeStatus Result = DecodeStatus::Success; + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_DUPLEX) { + // Determine the instruction class of each instruction in the duplex. + unsigned duplexIClass, IClassLow, IClassHigh; + + duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1); + switch (duplexIClass) { + default: + return MCDisassembler::Fail; + case 0: + IClassLow = HexagonII::HSIG_L1; + IClassHigh = HexagonII::HSIG_L1; + break; + case 1: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_L1; + break; + case 2: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_L2; + break; + case 3: + IClassLow = HexagonII::HSIG_A; + IClassHigh = HexagonII::HSIG_A; + break; + case 4: + IClassLow = HexagonII::HSIG_L1; + IClassHigh = HexagonII::HSIG_A; + break; + case 5: + IClassLow = HexagonII::HSIG_L2; + IClassHigh = HexagonII::HSIG_A; + break; + case 6: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_A; + break; + case 7: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_A; + break; + case 8: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_L1; + break; + case 9: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_L2; + break; + case 10: + IClassLow = HexagonII::HSIG_S1; + IClassHigh = HexagonII::HSIG_S1; + break; + case 11: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_S1; + break; + case 12: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_L1; + break; + case 13: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_L2; + break; + case 14: + IClassLow = HexagonII::HSIG_S2; + IClassHigh = HexagonII::HSIG_S2; + break; + } + + // Set the MCInst to be a duplex instruction. Which one doesn't matter. + MI.setOpcode(Hexagon::DuplexIClass0); + + // Decode each instruction in the duplex. + // Create an MCInst for each instruction. + unsigned instLow = Instruction & 0x1fff; + unsigned instHigh = (Instruction >> 16) & 0x1fff; + unsigned opLow; + if (GetSubinstOpcode(IClassLow, instLow, opLow, os) != + MCDisassembler::Success) + return MCDisassembler::Fail; + unsigned opHigh; + if (GetSubinstOpcode(IClassHigh, instHigh, opHigh, os) != + MCDisassembler::Success) + return MCDisassembler::Fail; + MCInst *MILow = new (getContext()) MCInst; + MILow->setOpcode(opLow); + MCInst *MIHigh = new (getContext()) MCInst; + MIHigh->setOpcode(opHigh); + AddSubinstOperands(MILow, opLow, instLow); + AddSubinstOperands(MIHigh, opHigh, instHigh); + // see ConvertToSubInst() in + // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp + + // Add the duplex instruction MCInsts as operands to the passed in MCInst. + MCOperand OPLow = MCOperand::createInst(MILow); + MCOperand OPHigh = MCOperand::createInst(MIHigh); + MI.addOperand(OPLow); + MI.addOperand(OPHigh); + Complete = true; + } else { + if ((Instruction & HexagonII::INST_PARSE_MASK) == + HexagonII::INST_PARSE_PACKET_END) + Complete = true; + // Calling the auto-generated decoder function. + Result = + decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI); + } + return Result; } + +// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td +enum subInstBinaryValues { + V4_SA1_addi_BITS = 0x0000, + V4_SA1_addi_MASK = 0x1800, + V4_SA1_addrx_BITS = 0x1800, + V4_SA1_addrx_MASK = 0x1f00, + V4_SA1_addsp_BITS = 0x0c00, + V4_SA1_addsp_MASK = 0x1c00, + V4_SA1_and1_BITS = 0x1200, + V4_SA1_and1_MASK = 0x1f00, + V4_SA1_clrf_BITS = 0x1a70, + V4_SA1_clrf_MASK = 0x1e70, + V4_SA1_clrfnew_BITS = 0x1a50, + V4_SA1_clrfnew_MASK = 0x1e70, + V4_SA1_clrt_BITS = 0x1a60, + V4_SA1_clrt_MASK = 0x1e70, + V4_SA1_clrtnew_BITS = 0x1a40, + V4_SA1_clrtnew_MASK = 0x1e70, + V4_SA1_cmpeqi_BITS = 0x1900, + V4_SA1_cmpeqi_MASK = 0x1f00, + V4_SA1_combine0i_BITS = 0x1c00, + V4_SA1_combine0i_MASK = 0x1d18, + V4_SA1_combine1i_BITS = 0x1c08, + V4_SA1_combine1i_MASK = 0x1d18, + V4_SA1_combine2i_BITS = 0x1c10, + V4_SA1_combine2i_MASK = 0x1d18, + V4_SA1_combine3i_BITS = 0x1c18, + V4_SA1_combine3i_MASK = 0x1d18, + V4_SA1_combinerz_BITS = 0x1d08, + V4_SA1_combinerz_MASK = 0x1d08, + V4_SA1_combinezr_BITS = 0x1d00, + V4_SA1_combinezr_MASK = 0x1d08, + V4_SA1_dec_BITS = 0x1300, + V4_SA1_dec_MASK = 0x1f00, + V4_SA1_inc_BITS = 0x1100, + V4_SA1_inc_MASK = 0x1f00, + V4_SA1_seti_BITS = 0x0800, + V4_SA1_seti_MASK = 0x1c00, + V4_SA1_setin1_BITS = 0x1a00, + V4_SA1_setin1_MASK = 0x1e40, + V4_SA1_sxtb_BITS = 0x1500, + V4_SA1_sxtb_MASK = 0x1f00, + V4_SA1_sxth_BITS = 0x1400, + V4_SA1_sxth_MASK = 0x1f00, + V4_SA1_tfr_BITS = 0x1000, + V4_SA1_tfr_MASK = 0x1f00, + V4_SA1_zxtb_BITS = 0x1700, + V4_SA1_zxtb_MASK = 0x1f00, + V4_SA1_zxth_BITS = 0x1600, + V4_SA1_zxth_MASK = 0x1f00, + V4_SL1_loadri_io_BITS = 0x0000, + V4_SL1_loadri_io_MASK = 0x1000, + V4_SL1_loadrub_io_BITS = 0x1000, + V4_SL1_loadrub_io_MASK = 0x1000, + V4_SL2_deallocframe_BITS = 0x1f00, + V4_SL2_deallocframe_MASK = 0x1fc0, + V4_SL2_jumpr31_BITS = 0x1fc0, + V4_SL2_jumpr31_MASK = 0x1fc4, + V4_SL2_jumpr31_f_BITS = 0x1fc5, + V4_SL2_jumpr31_f_MASK = 0x1fc7, + V4_SL2_jumpr31_fnew_BITS = 0x1fc7, + V4_SL2_jumpr31_fnew_MASK = 0x1fc7, + V4_SL2_jumpr31_t_BITS = 0x1fc4, + V4_SL2_jumpr31_t_MASK = 0x1fc7, + V4_SL2_jumpr31_tnew_BITS = 0x1fc6, + V4_SL2_jumpr31_tnew_MASK = 0x1fc7, + V4_SL2_loadrb_io_BITS = 0x1000, + V4_SL2_loadrb_io_MASK = 0x1800, + V4_SL2_loadrd_sp_BITS = 0x1e00, + V4_SL2_loadrd_sp_MASK = 0x1f00, + V4_SL2_loadrh_io_BITS = 0x0000, + V4_SL2_loadrh_io_MASK = 0x1800, + V4_SL2_loadri_sp_BITS = 0x1c00, + V4_SL2_loadri_sp_MASK = 0x1e00, + V4_SL2_loadruh_io_BITS = 0x0800, + V4_SL2_loadruh_io_MASK = 0x1800, + V4_SL2_return_BITS = 0x1f40, + V4_SL2_return_MASK = 0x1fc4, + V4_SL2_return_f_BITS = 0x1f45, + V4_SL2_return_f_MASK = 0x1fc7, + V4_SL2_return_fnew_BITS = 0x1f47, + V4_SL2_return_fnew_MASK = 0x1fc7, + V4_SL2_return_t_BITS = 0x1f44, + V4_SL2_return_t_MASK = 0x1fc7, + V4_SL2_return_tnew_BITS = 0x1f46, + V4_SL2_return_tnew_MASK = 0x1fc7, + V4_SS1_storeb_io_BITS = 0x1000, + V4_SS1_storeb_io_MASK = 0x1000, + V4_SS1_storew_io_BITS = 0x0000, + V4_SS1_storew_io_MASK = 0x1000, + V4_SS2_allocframe_BITS = 0x1c00, + V4_SS2_allocframe_MASK = 0x1e00, + V4_SS2_storebi0_BITS = 0x1200, + V4_SS2_storebi0_MASK = 0x1f00, + V4_SS2_storebi1_BITS = 0x1300, + V4_SS2_storebi1_MASK = 0x1f00, + V4_SS2_stored_sp_BITS = 0x0a00, + V4_SS2_stored_sp_MASK = 0x1e00, + V4_SS2_storeh_io_BITS = 0x0000, + V4_SS2_storeh_io_MASK = 0x1800, + V4_SS2_storew_sp_BITS = 0x0800, + V4_SS2_storew_sp_MASK = 0x1e00, + V4_SS2_storewi0_BITS = 0x1000, + V4_SS2_storewi0_MASK = 0x1f00, + V4_SS2_storewi1_BITS = 0x1100, + V4_SS2_storewi1_MASK = 0x1f00 +}; + +static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, + raw_ostream &os) { + switch (IClass) { + case HexagonII::HSIG_L1: + if ((inst & V4_SL1_loadri_io_MASK) == V4_SL1_loadri_io_BITS) + op = Hexagon::V4_SL1_loadri_io; + else if ((inst & V4_SL1_loadrub_io_MASK) == V4_SL1_loadrub_io_BITS) + op = Hexagon::V4_SL1_loadrub_io; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_L2: + if ((inst & V4_SL2_deallocframe_MASK) == V4_SL2_deallocframe_BITS) + op = Hexagon::V4_SL2_deallocframe; + else if ((inst & V4_SL2_jumpr31_MASK) == V4_SL2_jumpr31_BITS) + op = Hexagon::V4_SL2_jumpr31; + else if ((inst & V4_SL2_jumpr31_f_MASK) == V4_SL2_jumpr31_f_BITS) + op = Hexagon::V4_SL2_jumpr31_f; + else if ((inst & V4_SL2_jumpr31_fnew_MASK) == V4_SL2_jumpr31_fnew_BITS) + op = Hexagon::V4_SL2_jumpr31_fnew; + else if ((inst & V4_SL2_jumpr31_t_MASK) == V4_SL2_jumpr31_t_BITS) + op = Hexagon::V4_SL2_jumpr31_t; + else if ((inst & V4_SL2_jumpr31_tnew_MASK) == V4_SL2_jumpr31_tnew_BITS) + op = Hexagon::V4_SL2_jumpr31_tnew; + else if ((inst & V4_SL2_loadrb_io_MASK) == V4_SL2_loadrb_io_BITS) + op = Hexagon::V4_SL2_loadrb_io; + else if ((inst & V4_SL2_loadrd_sp_MASK) == V4_SL2_loadrd_sp_BITS) + op = Hexagon::V4_SL2_loadrd_sp; + else if ((inst & V4_SL2_loadrh_io_MASK) == V4_SL2_loadrh_io_BITS) + op = Hexagon::V4_SL2_loadrh_io; + else if ((inst & V4_SL2_loadri_sp_MASK) == V4_SL2_loadri_sp_BITS) + op = Hexagon::V4_SL2_loadri_sp; + else if ((inst & V4_SL2_loadruh_io_MASK) == V4_SL2_loadruh_io_BITS) + op = Hexagon::V4_SL2_loadruh_io; + else if ((inst & V4_SL2_return_MASK) == V4_SL2_return_BITS) + op = Hexagon::V4_SL2_return; + else if ((inst & V4_SL2_return_f_MASK) == V4_SL2_return_f_BITS) + op = Hexagon::V4_SL2_return_f; + else if ((inst & V4_SL2_return_fnew_MASK) == V4_SL2_return_fnew_BITS) + op = Hexagon::V4_SL2_return_fnew; + else if ((inst & V4_SL2_return_t_MASK) == V4_SL2_return_t_BITS) + op = Hexagon::V4_SL2_return_t; + else if ((inst & V4_SL2_return_tnew_MASK) == V4_SL2_return_tnew_BITS) + op = Hexagon::V4_SL2_return_tnew; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_A: + if ((inst & V4_SA1_addi_MASK) == V4_SA1_addi_BITS) + op = Hexagon::V4_SA1_addi; + else if ((inst & V4_SA1_addrx_MASK) == V4_SA1_addrx_BITS) + op = Hexagon::V4_SA1_addrx; + else if ((inst & V4_SA1_addsp_MASK) == V4_SA1_addsp_BITS) + op = Hexagon::V4_SA1_addsp; + else if ((inst & V4_SA1_and1_MASK) == V4_SA1_and1_BITS) + op = Hexagon::V4_SA1_and1; + else if ((inst & V4_SA1_clrf_MASK) == V4_SA1_clrf_BITS) + op = Hexagon::V4_SA1_clrf; + else if ((inst & V4_SA1_clrfnew_MASK) == V4_SA1_clrfnew_BITS) + op = Hexagon::V4_SA1_clrfnew; + else if ((inst & V4_SA1_clrt_MASK) == V4_SA1_clrt_BITS) + op = Hexagon::V4_SA1_clrt; + else if ((inst & V4_SA1_clrtnew_MASK) == V4_SA1_clrtnew_BITS) + op = Hexagon::V4_SA1_clrtnew; + else if ((inst & V4_SA1_cmpeqi_MASK) == V4_SA1_cmpeqi_BITS) + op = Hexagon::V4_SA1_cmpeqi; + else if ((inst & V4_SA1_combine0i_MASK) == V4_SA1_combine0i_BITS) + op = Hexagon::V4_SA1_combine0i; + else if ((inst & V4_SA1_combine1i_MASK) == V4_SA1_combine1i_BITS) + op = Hexagon::V4_SA1_combine1i; + else if ((inst & V4_SA1_combine2i_MASK) == V4_SA1_combine2i_BITS) + op = Hexagon::V4_SA1_combine2i; + else if ((inst & V4_SA1_combine3i_MASK) == V4_SA1_combine3i_BITS) + op = Hexagon::V4_SA1_combine3i; + else if ((inst & V4_SA1_combinerz_MASK) == V4_SA1_combinerz_BITS) + op = Hexagon::V4_SA1_combinerz; + else if ((inst & V4_SA1_combinezr_MASK) == V4_SA1_combinezr_BITS) + op = Hexagon::V4_SA1_combinezr; + else if ((inst & V4_SA1_dec_MASK) == V4_SA1_dec_BITS) + op = Hexagon::V4_SA1_dec; + else if ((inst & V4_SA1_inc_MASK) == V4_SA1_inc_BITS) + op = Hexagon::V4_SA1_inc; + else if ((inst & V4_SA1_seti_MASK) == V4_SA1_seti_BITS) + op = Hexagon::V4_SA1_seti; + else if ((inst & V4_SA1_setin1_MASK) == V4_SA1_setin1_BITS) + op = Hexagon::V4_SA1_setin1; + else if ((inst & V4_SA1_sxtb_MASK) == V4_SA1_sxtb_BITS) + op = Hexagon::V4_SA1_sxtb; + else if ((inst & V4_SA1_sxth_MASK) == V4_SA1_sxth_BITS) + op = Hexagon::V4_SA1_sxth; + else if ((inst & V4_SA1_tfr_MASK) == V4_SA1_tfr_BITS) + op = Hexagon::V4_SA1_tfr; + else if ((inst & V4_SA1_zxtb_MASK) == V4_SA1_zxtb_BITS) + op = Hexagon::V4_SA1_zxtb; + else if ((inst & V4_SA1_zxth_MASK) == V4_SA1_zxth_BITS) + op = Hexagon::V4_SA1_zxth; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_S1: + if ((inst & V4_SS1_storeb_io_MASK) == V4_SS1_storeb_io_BITS) + op = Hexagon::V4_SS1_storeb_io; + else if ((inst & V4_SS1_storew_io_MASK) == V4_SS1_storew_io_BITS) + op = Hexagon::V4_SS1_storew_io; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + case HexagonII::HSIG_S2: + if ((inst & V4_SS2_allocframe_MASK) == V4_SS2_allocframe_BITS) + op = Hexagon::V4_SS2_allocframe; + else if ((inst & V4_SS2_storebi0_MASK) == V4_SS2_storebi0_BITS) + op = Hexagon::V4_SS2_storebi0; + else if ((inst & V4_SS2_storebi1_MASK) == V4_SS2_storebi1_BITS) + op = Hexagon::V4_SS2_storebi1; + else if ((inst & V4_SS2_stored_sp_MASK) == V4_SS2_stored_sp_BITS) + op = Hexagon::V4_SS2_stored_sp; + else if ((inst & V4_SS2_storeh_io_MASK) == V4_SS2_storeh_io_BITS) + op = Hexagon::V4_SS2_storeh_io; + else if ((inst & V4_SS2_storew_sp_MASK) == V4_SS2_storew_sp_BITS) + op = Hexagon::V4_SS2_storew_sp; + else if ((inst & V4_SS2_storewi0_MASK) == V4_SS2_storewi0_BITS) + op = Hexagon::V4_SS2_storewi0; + else if ((inst & V4_SS2_storewi1_MASK) == V4_SS2_storewi1_BITS) + op = Hexagon::V4_SS2_storewi1; + else { + os << "<unknown subinstruction>"; + return MCDisassembler::Fail; + } + break; + default: + os << "<unknown>"; + return MCDisassembler::Fail; + } + return MCDisassembler::Success; +} + +static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) { + if (encoded_reg < 8) + return Hexagon::R0 + encoded_reg; + else if (encoded_reg < 16) + return Hexagon::R0 + encoded_reg + 8; + return Hexagon::NoRegister; +} + +static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) { + if (encoded_dreg < 4) + return Hexagon::D0 + encoded_dreg; + else if (encoded_dreg < 8) + return Hexagon::D0 + encoded_dreg + 4; + return Hexagon::NoRegister; +} + +static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { + int64_t operand; + MCOperand Op; + switch (opcode) { + case Hexagon::V4_SL2_deallocframe: + case Hexagon::V4_SL2_jumpr31: + case Hexagon::V4_SL2_jumpr31_f: + case Hexagon::V4_SL2_jumpr31_fnew: + case Hexagon::V4_SL2_jumpr31_t: + case Hexagon::V4_SL2_jumpr31_tnew: + case Hexagon::V4_SL2_return: + case Hexagon::V4_SL2_return_f: + case Hexagon::V4_SL2_return_fnew: + case Hexagon::V4_SL2_return_t: + case Hexagon::V4_SL2_return_tnew: + // no operands for these instructions + break; + case Hexagon::V4_SS2_allocframe: + // u 8-4{5_3} + operand = ((inst & 0x1f0) >> 4) << 3; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL1_loadri_io: + // Rd 3-0, Rs 7-4, u 11-8{4_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 6; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL1_loadrub_io: + // Rd 3-0, Rs 7-4, u 11-8 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 8; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL2_loadrb_io: + // Rd 3-0, Rs 7-4, u 10-8 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x700) >> 8; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL2_loadrh_io: + case Hexagon::V4_SL2_loadruh_io: + // Rd 3-0, Rs 7-4, u 10-8{3_1} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x700) >> 8) << 1; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL2_loadrd_sp: + // Rdd 2-0, u 7-3{5_3} + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x0f8) >> 3) << 3; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SL2_loadri_sp: + // Rd 3-0, u 8-4{5_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x1f0) >> 4) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_addi: + // Rx 3-0 (x2), s7 10-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + MI->addOperand(Op); + operand = SignExtend64<7>((inst & 0x7f0) >> 4); + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_addrx: + // Rx 3-0 (x2), Rs 7-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + case Hexagon::V4_SA1_and1: + case Hexagon::V4_SA1_dec: + case Hexagon::V4_SA1_inc: + case Hexagon::V4_SA1_sxtb: + case Hexagon::V4_SA1_sxth: + case Hexagon::V4_SA1_tfr: + case Hexagon::V4_SA1_zxtb: + case Hexagon::V4_SA1_zxth: + // Rd 3-0, Rs 7-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_addsp: + // Rd 3-0, u 9-4{6_2} + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x3f0) >> 4) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_seti: + // Rd 3-0, u 9-4 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x3f0) >> 4; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_clrf: + case Hexagon::V4_SA1_clrfnew: + case Hexagon::V4_SA1_clrt: + case Hexagon::V4_SA1_clrtnew: + case Hexagon::V4_SA1_setin1: + // Rd 3-0 + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_cmpeqi: + // Rs 7-4, u 1-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = inst & 0x3; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_combine0i: + case Hexagon::V4_SA1_combine1i: + case Hexagon::V4_SA1_combine2i: + case Hexagon::V4_SA1_combine3i: + // Rdd 2-0, u 6-5 + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0x060) >> 5; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SA1_combinerz: + case Hexagon::V4_SA1_combinezr: + // Rdd 2-0, Rs 7-4 + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS1_storeb_io: + // Rs 7-4, u 11-8, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf00) >> 8; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS1_storew_io: + // Rs 7-4, u 11-8{4_2}, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0xf00) >> 8) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storebi0: + case Hexagon::V4_SS2_storebi1: + // Rs 7-4, u 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = inst & 0xf; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storewi0: + case Hexagon::V4_SS2_storewi1: + // Rs 7-4, u 3-0{4_2} + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = (inst & 0xf) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_stored_sp: + // s 8-3{6_3}, Rtt 2-0 + operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3); + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getDRegFromSubinstEncoding(inst & 0x7); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + case Hexagon::V4_SS2_storeh_io: + // Rs 7-4, u 10-8{3_1}, Rt 3-0 + operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + operand = ((inst & 0x700) >> 8) << 1; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + case Hexagon::V4_SS2_storew_sp: + // u 8-4{5_2}, Rd 3-0 + operand = ((inst & 0x1f0) >> 4) << 2; + Op = MCOperand::createImm(operand); + MI->addOperand(Op); + operand = getRegFromSubinstEncoding(inst & 0xf); + Op = MCOperand::createReg(operand); + MI->addOperand(Op); + break; + default: + // don't crash with an invalid subinstruction + // llvm_unreachable("Invalid subinstruction in duplex instruction"); + break; + } +} diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index dfe79f9ff7b0..6e2ecaf57e49 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -76,4 +76,11 @@ namespace llvm { // Maximum number of words and instructions in a packet. #define HEXAGON_PACKET_SIZE 4 +// Minimum number of instructions in an end-loop packet. +#define HEXAGON_PACKET_INNER_SIZE 2 +#define HEXAGON_PACKET_OUTER_SIZE 3 +// Maximum number of instructions in a packet before shuffling, +// including a compound one or a duplex or an extender. +#define HEXAGON_PRESHUFFLE_PACKET_SIZE (HEXAGON_PACKET_SIZE + 3) + #endif diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index e9491baf29ef..05728d2b627e 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "HexagonTargetMachine.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -78,14 +79,14 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_ConstantPoolIndex: - O << *GetCPISymbol(MO.getIndex()); + GetCPISymbol(MO.getIndex())->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: // Computing the address of a global symbol, not calling it. - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); printOffset(MO.getOffset(), O); return; } @@ -177,49 +178,40 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, /// the current output stream. /// void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { - if (MI->isBundle()) { - std::vector<MachineInstr const *> BundleMIs; + MCInst MCB; + MCB.setOpcode(Hexagon::BUNDLE); + MCB.addOperand(MCOperand::createImm(0)); - const MachineBasicBlock *MBB = MI->getParent(); + if (MI->isBundle()) { + const MachineBasicBlock* MBB = MI->getParent(); MachineBasicBlock::const_instr_iterator MII = MI; - ++MII; - unsigned int IgnoreCount = 0; - while (MII != MBB->end() && MII->isInsideBundle()) { - const MachineInstr *MInst = MII; - if (MInst->getOpcode() == TargetOpcode::DBG_VALUE || - MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) { - IgnoreCount++; - ++MII; - continue; + unsigned IgnoreCount = 0; + + for (++MII; MII != MBB->end() && MII->isInsideBundle(); ++MII) { + if (MII->getOpcode() == TargetOpcode::DBG_VALUE || + MII->getOpcode() == TargetOpcode::IMPLICIT_DEF) + ++IgnoreCount; + else { + HexagonLowerToMC(MII, MCB, *this); } - // BundleMIs.push_back(&*MII); - BundleMIs.push_back(MInst); - ++MII; - } - unsigned Size = BundleMIs.size(); - assert((Size + IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); - for (unsigned Index = 0; Index < Size; Index++) { - MCInst MCI; - - HexagonLowerToMC(BundleMIs[Index], MCI, *this); - HexagonMCInstrInfo::AppendImplicitOperands(MCI); - HexagonMCInstrInfo::setPacketBegin(MCI, Index == 0); - HexagonMCInstrInfo::setPacketEnd(MCI, Index == (Size - 1)); - EmitToStreamer(*OutStreamer, MCI); } } else { - MCInst MCI; - HexagonLowerToMC(MI, MCI, *this); - HexagonMCInstrInfo::AppendImplicitOperands(MCI); - if (MI->getOpcode() == Hexagon::ENDLOOP0) { - HexagonMCInstrInfo::setPacketBegin(MCI, true); - HexagonMCInstrInfo::setPacketEnd(MCI, true); - } - EmitToStreamer(*OutStreamer, MCI); + HexagonLowerToMC(MI, MCB, *this); + HexagonMCInstrInfo::padEndloop(MCB); } - - return; + // Examine the packet and try to find instructions that can be converted + // to compounds. + HexagonMCInstrInfo::tryCompound(*Subtarget->getInstrInfo(), + OutStreamer->getContext(), MCB); + // Examine the packet and convert pairs of instructions to duplex + // instructions when possible. + SmallVector<DuplexCandidate, 8> possibleDuplexes; + possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties( + *Subtarget->getInstrInfo(), MCB); + HexagonMCShuffle(*Subtarget->getInstrInfo(), *Subtarget, + OutStreamer->getContext(), MCB, possibleDuplexes); + EmitToStreamer(*OutStreamer, MCB); } extern "C" void LLVMInitializeHexagonAsmPrinter() { diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 0885a794a7b4..868f87e18413 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -201,17 +201,17 @@ namespace { break; } // Check individual operands. - for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) { + for (const MachineOperand &MO : MI->operands()) { // While the presence of a frame index does not prove that a stack // frame will be required, all frame indexes should be within alloc- // frame/deallocframe. Otherwise, the code that translates a frame // index into an offset would have to be aware of the placement of // the frame creation/destruction instructions. - if (Mo->isFI()) + if (MO.isFI()) return true; - if (!Mo->isReg()) + if (!MO.isReg()) continue; - unsigned R = Mo->getReg(); + unsigned R = MO.getReg(); // Virtual registers will need scavenging, which then may require // a stack slot. if (TargetRegisterInfo::isVirtualRegister(R)) diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index ed5676c1fbb6..74d92aef25ac 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2370,7 +2370,8 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// isLegalAddressingMode - Return true if the addressing mode represented by /// AM is legal for this target, for a load/store of the specified type. bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Allows a signed-extended 11-bit immediate field. if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) return false; diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 584c2c57c7ca..b80e8477eb7b 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -198,7 +198,8 @@ bool isPositiveHalfWord(SDNode *N); /// The type may be VoidTy, in which case only return true if the addressing /// mode is legal for a load/store of any legal type. /// TODO: Handle pre/postinc as well. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 36a7e9f642c6..44bab292f32c 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -66,10 +66,8 @@ def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) class OpcodeHexagon { field bits<32> Inst = ?; // Default to an invalid insn. bits<4> IClass = 0; // ICLASS - bits<2> IParse = 0; // Parse bits. let Inst{31-28} = IClass; - let Inst{15-14} = IParse; bits<1> zero = 0; } diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 7f7b2c96dba7..db83ef6bc474 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -146,6 +146,11 @@ class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123, TypePREFIX>, OpcodeHexagon; +class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], + string cstr = ""> + : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypeDUPLEX>, + OpcodeHexagon; + class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = ""> : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 49b4517698d5..e566a97789a9 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -779,10 +779,9 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return false; } -MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FI) const { +MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FI) const { // Hexagon_TODO: Implement. return nullptr; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 0239cabe9e52..a7ae65e4eb9c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -114,10 +114,12 @@ public: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override { return nullptr; } diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 8b667c645156..65b0f4974367 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -4263,3 +4263,7 @@ def J4_jumpsetr: CJInst < let Inst{19-16} = Rs; let Inst{7-1} = r9_2{8-2}; } + +// Duplex instructions +//===----------------------------------------------------------------------===// +include "HexagonIsetDx.td" diff --git a/lib/Target/Hexagon/HexagonIsetDx.td b/lib/Target/Hexagon/HexagonIsetDx.td new file mode 100644 index 000000000000..0ca95e999859 --- /dev/null +++ b/lib/Target/Hexagon/HexagonIsetDx.td @@ -0,0 +1,728 @@ +//=- HexagonIsetDx.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon duplex instructions. +// +//===----------------------------------------------------------------------===// + +// SA1_combine1i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine1i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#1, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b01; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SL2_jumpr31_f: Indirect conditional jump if false. +// SL2_jumpr31_f -> SL2_jumpr31_fnew +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_f: SUBInst < + (outs ), + (ins ), + "if (!p0) jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b101; + } + +// SL2_deallocframe: Deallocate stack frame. +let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in +def V4_SL2_deallocframe: SUBInst < + (outs ), + (ins ), + "deallocframe"> { + let Inst{12-6} = 0b1111100; + let Inst{2} = 0b0; + } + +// SL2_return_f: Deallocate stack frame and return. +// SL2_return_f -> SL2_return_fnew +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_f: SUBInst < + (outs ), + (ins ), + "if (!p0) dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b101; + } + +// SA1_combine3i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine3i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#3, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b11; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SS2_storebi0: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS2_storebi0: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "memb($Rs + #$u4_0)=#0"> { + bits<4> Rs; + bits<4> u4_0; + + let Inst{12-8} = 0b10010; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_0; + } + +// SA1_clrtnew: Clear if true. +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrtnew: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (p0.new) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b100; + let Inst{3-0} = Rd; + } + +// SL2_loadruh_io: Load half. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadruh_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_1Imm:$u3_1), + "$Rd = memuh($Rs + #$u3_1)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u3_1; + + let Inst{12-11} = 0b01; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + } + +// SL2_jumpr31_tnew: Indirect conditional jump if true. +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_tnew: SUBInst < + (outs ), + (ins ), + "if (p0.new) jumpr:nt r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b110; + } + +// SA1_addi: Add. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in +def V4_SA1_addi: SUBInst < + (outs IntRegs:$Rx), + (ins IntRegs:$_src_, s7Ext:$s7), + "$Rx = add($_src_, #$s7)" , + [] , + "$_src_ = $Rx"> { + bits<4> Rx; + bits<7> s7; + + let Inst{12-11} = 0b00; + let Inst{3-0} = Rx; + let Inst{10-4} = s7; + } + +// SL1_loadrub_io: Load byte. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL1_loadrub_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "$Rd = memub($Rs + #$u4_0)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u4_0; + + let Inst{12} = 0b1; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_0; + } + +// SL1_loadri_io: Load word. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL1_loadri_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "$Rd = memw($Rs + #$u4_2)"> { + bits<4> Rd; + bits<4> Rs; + bits<6> u4_2; + + let Inst{12} = 0b0; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_2{5-2}; + } + +// SA1_cmpeqi: Compareimmed. +let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_cmpeqi: SUBInst < + (outs ), + (ins IntRegs:$Rs, u2Imm:$u2), + "p0 = cmp.eq($Rs, #$u2)"> { + bits<4> Rs; + bits<2> u2; + + let Inst{12-8} = 0b11001; + let Inst{7-4} = Rs; + let Inst{1-0} = u2; + } + +// SA1_combinerz: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combinerz: SUBInst < + (outs DoubleRegs:$Rdd), + (ins IntRegs:$Rs), + "$Rdd = combine($Rs, #0)"> { + bits<3> Rdd; + bits<4> Rs; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b1; + let Inst{3} = 0b1; + let Inst{2-0} = Rdd; + let Inst{7-4} = Rs; + } + +// SL2_return_t: Deallocate stack frame and return. +// SL2_return_t -> SL2_return_tnew +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_t: SUBInst < + (outs ), + (ins ), + "if (p0) dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b100; + } + +// SS2_allocframe: Allocate stack frame. +let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in +def V4_SS2_allocframe: SUBInst < + (outs ), + (ins u5_3Imm:$u5_3), + "allocframe(#$u5_3)"> { + bits<8> u5_3; + + let Inst{12-9} = 0b1110; + let Inst{8-4} = u5_3{7-3}; + } + +// SS2_storeh_io: Store half. +let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in +def V4_SS2_storeh_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt), + "memh($Rs + #$u3_1) = $Rt"> { + bits<4> Rs; + bits<4> u3_1; + bits<4> Rt; + + let Inst{12-11} = 0b00; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + let Inst{3-0} = Rt; + } + +// SS2_storewi0: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storewi0: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "memw($Rs + #$u4_2)=#0"> { + bits<4> Rs; + bits<6> u4_2; + + let Inst{12-8} = 0b10000; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_2{5-2}; + } + +// SS2_storewi1: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storewi1: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2), + "memw($Rs + #$u4_2)=#1"> { + bits<4> Rs; + bits<6> u4_2; + + let Inst{12-8} = 0b10001; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_2{5-2}; + } + +// SL2_jumpr31: Indirect conditional jump if true. +let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31: SUBInst < + (outs ), + (ins ), + "jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2} = 0b0; + } + +// SA1_combinezr: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combinezr: SUBInst < + (outs DoubleRegs:$Rdd), + (ins IntRegs:$Rs), + "$Rdd = combine(#0, $Rs)"> { + bits<3> Rdd; + bits<4> Rs; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b1; + let Inst{3} = 0b0; + let Inst{2-0} = Rdd; + let Inst{7-4} = Rs; + } + +// SL2_loadrh_io: Load half. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadrh_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_1Imm:$u3_1), + "$Rd = memh($Rs + #$u3_1)"> { + bits<4> Rd; + bits<4> Rs; + bits<4> u3_1; + + let Inst{12-11} = 0b00; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_1{3-1}; + } + +// SA1_addrx: Add. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_addrx: SUBInst < + (outs IntRegs:$Rx), + (ins IntRegs:$_src_, IntRegs:$Rs), + "$Rx = add($_src_, $Rs)" , + [] , + "$_src_ = $Rx"> { + bits<4> Rx; + bits<4> Rs; + + let Inst{12-8} = 0b11000; + let Inst{3-0} = Rx; + let Inst{7-4} = Rs; + } + +// SA1_setin1: Set to -1. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_setin1: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "$Rd = #-1"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6} = 0b0; + let Inst{3-0} = Rd; + } + +// SA1_sxth: Sxth. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_sxth: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = sxth($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10100; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_combine0i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine0i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#0, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b00; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SA1_combine2i: Combines. +let isCodeGenOnly = 1, hasSideEffects = 0 in +def V4_SA1_combine2i: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u2Imm:$u2), + "$Rdd = combine(#2, #$u2)"> { + bits<3> Rdd; + bits<2> u2; + + let Inst{12-10} = 0b111; + let Inst{8} = 0b0; + let Inst{4-3} = 0b10; + let Inst{2-0} = Rdd; + let Inst{6-5} = u2; + } + +// SA1_sxtb: Sxtb. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_sxtb: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = sxtb($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10101; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_clrf: Clear if false. +// SA1_clrf -> SA1_clrfnew +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrf: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (!p0) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b111; + let Inst{3-0} = Rd; + } + +// SL2_loadrb_io: Load byte. +let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadrb_io: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs, u3_0Imm:$u3_0), + "$Rd = memb($Rs + #$u3_0)"> { + bits<4> Rd; + bits<4> Rs; + bits<3> u3_0; + + let Inst{12-11} = 0b10; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + let Inst{10-8} = u3_0; + } + +// SA1_tfr: Tfr. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_tfr: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = $Rs"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10000; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SL2_loadrd_sp: Load dword. +let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in +def V4_SL2_loadrd_sp: SUBInst < + (outs DoubleRegs:$Rdd), + (ins u5_3Imm:$u5_3), + "$Rdd = memd(r29 + #$u5_3)"> { + bits<3> Rdd; + bits<8> u5_3; + + let Inst{12-8} = 0b11110; + let Inst{2-0} = Rdd; + let Inst{7-3} = u5_3{7-3}; + } + +// SA1_and1: And #1. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_and1: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = and($Rs, #1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10010; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SS2_storebi1: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS2_storebi1: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0), + "memb($Rs + #$u4_0)=#1"> { + bits<4> Rs; + bits<4> u4_0; + + let Inst{12-8} = 0b10011; + let Inst{7-4} = Rs; + let Inst{3-0} = u4_0; + } + +// SA1_inc: Inc. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_inc: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = add($Rs, #1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10001; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SS2_stored_sp: Store dword. +let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in +def V4_SS2_stored_sp: SUBInst < + (outs ), + (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt), + "memd(r29 + #$s6_3) = $Rtt"> { + bits<9> s6_3; + bits<3> Rtt; + + let Inst{12-9} = 0b0101; + let Inst{8-3} = s6_3{8-3}; + let Inst{2-0} = Rtt; + } + +// SS2_storew_sp: Store word. +let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS2_storew_sp: SUBInst < + (outs ), + (ins u5_2Imm:$u5_2, IntRegs:$Rt), + "memw(r29 + #$u5_2) = $Rt"> { + bits<7> u5_2; + bits<4> Rt; + + let Inst{12-9} = 0b0100; + let Inst{8-4} = u5_2{6-2}; + let Inst{3-0} = Rt; + } + +// SL2_jumpr31_fnew: Indirect conditional jump if false. +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_fnew: SUBInst < + (outs ), + (ins ), + "if (!p0.new) jumpr:nt r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b111; + } + +// SA1_clrt: Clear if true. +// SA1_clrt -> SA1_clrtnew +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrt: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (p0) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b110; + let Inst{3-0} = Rd; + } + +// SL2_return: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return: SUBInst < + (outs ), + (ins ), + "dealloc_return"> { + let Inst{12-6} = 0b1111101; + let Inst{2} = 0b0; + } + +// SA1_dec: Dec. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_dec: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = add($Rs,#-1)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10011; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_seti: Set immed. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in +def V4_SA1_seti: SUBInst < + (outs IntRegs:$Rd), + (ins u6Ext:$u6), + "$Rd = #$u6"> { + bits<4> Rd; + bits<6> u6; + + let Inst{12-10} = 0b010; + let Inst{3-0} = Rd; + let Inst{9-4} = u6; + } + +// SL2_jumpr31_t: Indirect conditional jump if true. +// SL2_jumpr31_t -> SL2_jumpr31_tnew +let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in +def V4_SL2_jumpr31_t: SUBInst < + (outs ), + (ins ), + "if (p0) jumpr r31"> { + let Inst{12-6} = 0b1111111; + let Inst{2-0} = 0b100; + } + +// SA1_clrfnew: Clear if false. +let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_clrfnew: SUBInst < + (outs IntRegs:$Rd), + (ins ), + "if (!p0.new) $Rd = #0"> { + bits<4> Rd; + + let Inst{12-9} = 0b1101; + let Inst{6-4} = 0b101; + let Inst{3-0} = Rd; + } + +// SS1_storew_io: Store word. +let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in +def V4_SS1_storew_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt), + "memw($Rs + #$u4_2) = $Rt"> { + bits<4> Rs; + bits<6> u4_2; + bits<4> Rt; + + let Inst{12} = 0b0; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_2{5-2}; + let Inst{3-0} = Rt; + } + +// SA1_zxtb: Zxtb. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_zxtb: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = and($Rs, #255)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10111; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + +// SA1_addsp: Add. +let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_addsp: SUBInst < + (outs IntRegs:$Rd), + (ins u6_2Imm:$u6_2), + "$Rd = add(r29, #$u6_2)"> { + bits<4> Rd; + bits<8> u6_2; + + let Inst{12-10} = 0b011; + let Inst{3-0} = Rd; + let Inst{9-4} = u6_2{7-2}; + } + +// SL2_loadri_sp: Load word. +let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in +def V4_SL2_loadri_sp: SUBInst < + (outs IntRegs:$Rd), + (ins u5_2Imm:$u5_2), + "$Rd = memw(r29 + #$u5_2)"> { + bits<4> Rd; + bits<7> u5_2; + + let Inst{12-9} = 0b1110; + let Inst{3-0} = Rd; + let Inst{8-4} = u5_2{6-2}; + } + +// SS1_storeb_io: Store byte. +let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in +def V4_SS1_storeb_io: SUBInst < + (outs ), + (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt), + "memb($Rs + #$u4_0) = $Rt"> { + bits<4> Rs; + bits<4> u4_0; + bits<4> Rt; + + let Inst{12} = 0b1; + let Inst{7-4} = Rs; + let Inst{11-8} = u4_0; + let Inst{3-0} = Rt; + } + +// SL2_return_tnew: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_tnew: SUBInst < + (outs ), + (ins ), + "if (p0.new) dealloc_return:nt"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b110; + } + +// SL2_return_fnew: Deallocate stack frame and return. +let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in +def V4_SL2_return_fnew: SUBInst < + (outs ), + (ins ), + "if (!p0.new) dealloc_return:nt"> { + let Inst{12-6} = 0b1111101; + let Inst{2-0} = 0b111; + } + +// SA1_zxth: Zxth. +let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +def V4_SA1_zxth: SUBInst < + (outs IntRegs:$Rd), + (ins IntRegs:$Rs), + "$Rd = zxth($Rs)"> { + bits<4> Rd; + bits<4> Rs; + + let Inst{12-8} = 0b10110; + let Inst{3-0} = Rd; + let Inst{7-4} = Rs; + } + diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 535d1f91b493..75189b696ea2 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -15,9 +15,12 @@ #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -28,19 +31,30 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, MCContext &MC = Printer.OutContext; const MCExpr *ME; - ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC); + ME = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, MC); if (!MO.isJTI() && MO.getOffset()) - ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC), + ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC), MC); return (MCOperand::createExpr(ME)); } // Create an MCInst from a MachineInstr -void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI, +void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB, HexagonAsmPrinter& AP) { - MCI.setOpcode(MI->getOpcode()); + if(MI->getOpcode() == Hexagon::ENDLOOP0){ + HexagonMCInstrInfo::setInnerLoop(MCB); + return; + } + if(MI->getOpcode() == Hexagon::ENDLOOP1){ + HexagonMCInstrInfo::setOuterLoop(MCB); + return; + } + MCInst* MCI = new (AP.OutContext) MCInst; + MCI->setOpcode(MI->getOpcode()); + assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) && + "MCI opcode should have been set on construction"); for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { const MachineOperand &MO = MI->getOperand(i); @@ -67,7 +81,7 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI, break; case MachineOperand::MO_MachineBasicBlock: MCO = MCOperand::createExpr - (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), + (MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), AP.OutContext)); break; case MachineOperand::MO_GlobalAddress: @@ -88,6 +102,7 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCI, break; } - MCI.addOperand(MCO); + MCI->addOperand(MCO); } + MCB.addOperand(MCOperand::createInst(MCI)); } diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index b7f364ef0751..be8204b7de53 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -27,6 +27,7 @@ let PrintMethod = "printImmOperand" in { def s8Imm : Operand<i32>; def s8Imm64 : Operand<i64>; def s6Imm : Operand<i32>; + def s6_3Imm : Operand<i32>; def s4Imm : Operand<i32>; def s4_0Imm : Operand<i32>; def s4_1Imm : Operand<i32>; @@ -51,8 +52,14 @@ let PrintMethod = "printImmOperand" in { def u6_2Imm : Operand<i32>; def u6_3Imm : Operand<i32>; def u5Imm : Operand<i32>; + def u5_2Imm : Operand<i32>; + def u5_3Imm : Operand<i32>; def u4Imm : Operand<i32>; + def u4_0Imm : Operand<i32>; + def u4_2Imm : Operand<i32>; def u3Imm : Operand<i32>; + def u3_0Imm : Operand<i32>; + def u3_1Imm : Operand<i32>; def u2Imm : Operand<i32>; def u1Imm : Operand<i32>; def n8Imm : Operand<i32>; @@ -444,6 +451,7 @@ let PrintMethod = "printExtOperand" in { def s10Ext : Operand<i32>; def s9Ext : Operand<i32>; def s8Ext : Operand<i32>; + def s7Ext : Operand<i32>; def s6Ext : Operand<i32>; def s11_0Ext : Operand<i32>; def s11_1Ext : Operand<i32>; diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt index 4c987ed32a64..6253686b4993 100644 --- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -4,8 +4,12 @@ add_llvm_library(LLVMHexagonDesc HexagonInstPrinter.cpp HexagonMCAsmInfo.cpp HexagonMCCodeEmitter.cpp + HexagonMCCompound.cpp + HexagonMCDuplexInfo.cpp HexagonMCInstrInfo.cpp + HexagonMCShuffler.cpp HexagonMCTargetDesc.cpp + HexagonShuffler.cpp ) add_dependencies(LLVMHexagonDesc HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 155aa9ef9557..76894840153d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -7,19 +7,150 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" +#include "HexagonFixupKinds.h" #include "HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; +using namespace Hexagon; namespace { class HexagonAsmBackend : public MCAsmBackend { + uint8_t OSABI; + StringRef CPU; + mutable uint64_t relaxedCnt; + std::unique_ptr <MCInstrInfo> MCII; + std::unique_ptr <MCInst *> RelaxTarget; public: - HexagonAsmBackend(Target const & /*T*/) {} + HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) : + OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *){} - unsigned getNumFixupKinds() const override { return 0; } + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createHexagonELFObjectWriter(OS, OSABI, CPU); + } + + unsigned getNumFixupKinds() const override { + return Hexagon::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { + const static MCFixupKindInfo Infos[Hexagon::NumTargetFixupKinds] = { + // This table *must* be in same the order of fixup_* kinds in + // HexagonFixupKinds.h. + // + // namei offset bits flags + {"fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_LO16", 0, 32, 0}, + {"fixup_Hexagon_HI16", 0, 32, 0}, + {"fixup_Hexagon_32", 0, 32, 0}, + {"fixup_Hexagon_16", 0, 32, 0}, + {"fixup_Hexagon_8", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_0", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_1", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_2", 0, 32, 0}, + {"fixup_Hexagon_GPREL16_3", 0, 32, 0}, + {"fixup_Hexagon_HL16", 0, 32, 0}, + {"fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_16_X", 0, 32, 0}, + {"fixup_Hexagon_12_X", 0, 32, 0}, + {"fixup_Hexagon_11_X", 0, 32, 0}, + {"fixup_Hexagon_10_X", 0, 32, 0}, + {"fixup_Hexagon_9_X", 0, 32, 0}, + {"fixup_Hexagon_8_X", 0, 32, 0}, + {"fixup_Hexagon_7_X", 0, 32, 0}, + {"fixup_Hexagon_6_X", 0, 32, 0}, + {"fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_COPY", 0, 32, 0}, + {"fixup_Hexagon_GLOB_DAT", 0, 32, 0}, + {"fixup_Hexagon_JMP_SLOT", 0, 32, 0}, + {"fixup_Hexagon_RELATIVE", 0, 32, 0}, + {"fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GOTREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_32", 0, 32, 0}, + {"fixup_Hexagon_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_DTPMOD_32", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_32", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_16", 0, 32, 0}, + {"fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GD_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_IE_LO16", 0, 32, 0}, + {"fixup_Hexagon_IE_HI16", 0, 32, 0}, + {"fixup_Hexagon_IE_32", 0, 32, 0}, + {"fixup_Hexagon_IE_16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_LO16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_HI16", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_32", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_LO16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_HI16", 0, 32, 0}, + {"fixup_Hexagon_TPREL_32", 0, 32, 0}, + {"fixup_Hexagon_TPREL_16", 0, 32, 0}, + {"fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_GOTREL_11_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_DTPREL_11_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_GD_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_LD_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_IE_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_IE_16_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_16_X", 0, 32, 0}, + {"fixup_Hexagon_IE_GOT_11_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_32_6_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_16_X", 0, 32, 0}, + {"fixup_Hexagon_TPREL_11_X", 0, 32, 0}}; + + if (Kind < FirstTargetFixupKind) { + return MCAsmBackend::getFixupKindInfo(Kind); + } + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/, unsigned /*DataSize*/, uint64_t /*Value*/, @@ -27,14 +158,119 @@ public: return; } - bool mayNeedRelaxation(MCInst const & /*Inst*/) const override { + bool isInstRelaxable(MCInst const &HMI) const { + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(*MCII, HMI); + bool Relaxable = false; + // Branches and loop-setup insns are handled as necessary by relaxation. + if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ || + (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV && + MCID.isBranch()) || + (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR && + HMI.getOpcode() != Hexagon::C4_addipc)) + if (HexagonMCInstrInfo::isExtendable(*MCII, HMI)) + Relaxable = true; + + return Relaxable; + } + + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool mayNeedRelaxation(MCInst const &Inst) const override { + assert(HexagonMCInstrInfo::isBundle(Inst)); + bool PreviousIsExtender = false; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { + auto const &Inst = *I.getInst(); + if (!PreviousIsExtender) { + if (isInstRelaxable(Inst)) + return true; + } + PreviousIsExtender = HexagonMCInstrInfo::isImmext(Inst); + } return false; } - bool fixupNeedsRelaxation(MCFixup const & /*Fixup*/, uint64_t /*Value*/, - MCRelaxableFragment const * /*DF*/, - MCAsmLayout const & /*Layout*/) const override { - llvm_unreachable("fixupNeedsRelaxation() unimplemented"); + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + MCInst const &MCB = DF->getInst(); + assert(HexagonMCInstrInfo::isBundle(MCB)); + + *RelaxTarget = nullptr; + MCInst &MCI = const_cast<MCInst &>(HexagonMCInstrInfo::instruction( + MCB, Fixup.getOffset() / HEXAGON_INSTR_SIZE)); + // If we cannot resolve the fixup value, it requires relaxation. + if (!Resolved) { + switch ((unsigned)Fixup.getKind()) { + case fixup_Hexagon_B22_PCREL: + // GetFixupCount assumes B22 won't relax + // Fallthrough + default: + return false; + break; + case fixup_Hexagon_B13_PCREL: + case fixup_Hexagon_B15_PCREL: + case fixup_Hexagon_B9_PCREL: + case fixup_Hexagon_B7_PCREL: { + if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { + ++relaxedCnt; + *RelaxTarget = &MCI; + return true; + } else { + return false; + } + break; + } + } + } + bool Relaxable = isInstRelaxable(MCI); + if (Relaxable == false) + return false; + + MCFixupKind Kind = Fixup.getKind(); + int64_t sValue = Value; + int64_t maxValue; + + switch ((unsigned)Kind) { + case fixup_Hexagon_B7_PCREL: + maxValue = 1 << 8; + break; + case fixup_Hexagon_B9_PCREL: + maxValue = 1 << 10; + break; + case fixup_Hexagon_B15_PCREL: + maxValue = 1 << 16; + break; + case fixup_Hexagon_B22_PCREL: + maxValue = 1 << 23; + break; + default: + maxValue = INT64_MAX; + break; + } + + bool isFarAway = -maxValue > sValue || sValue > maxValue - 1; + + if (isFarAway) { + if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { + ++relaxedCnt; + *RelaxTarget = &MCI; + return true; + } + } + + return false; + } + + /// Simple predicate for targets where !Resolved implies requiring relaxation + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced"); } void relaxInstruction(MCInst const & /*Inst*/, @@ -49,26 +285,11 @@ public: }; } // end anonymous namespace -namespace { -class ELFHexagonAsmBackend : public HexagonAsmBackend { - uint8_t OSABI; - -public: - ELFHexagonAsmBackend(Target const &T, uint8_t OSABI) - : HexagonAsmBackend(T), OSABI(OSABI) {} - - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { - StringRef CPU("HexagonV4"); - return createHexagonELFObjectWriter(OS, OSABI, CPU); - } -}; -} // end anonymous namespace - namespace llvm { MCAsmBackend *createHexagonAsmBackend(Target const &T, MCRegisterInfo const & /*MRI*/, - StringRef TT, StringRef /*CPU*/) { + StringRef TT, StringRef CPU) { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFHexagonAsmBackend(T, OSABI); + return new HexagonAsmBackend(T, OSABI, CPU); } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 6a72f205e9d3..f4d162ccf6a8 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -43,6 +43,7 @@ namespace HexagonII { TypeXTYPE = 8, TypeMEMOP = 9, TypeNV = 10, + TypeDUPLEX = 11, TypePREFIX = 30, // Such as extenders. TypeENDLOOP = 31 // Such as end of a HW loop. }; @@ -190,7 +191,26 @@ namespace HexagonII { MO_GPREL }; - enum class InstParseBits : uint32_t { + // Hexagon Sub-instruction classes. + enum SubInstructionGroup { + HSIG_None = 0, + HSIG_L1, + HSIG_L2, + HSIG_S1, + HSIG_S2, + HSIG_A, + HSIG_Compound + }; + + // Hexagon Compound classes. + enum CompoundGroup { + HCG_None = 0, + HCG_A, + HCG_B, + HCG_C + }; + + enum InstParseBits { INST_PARSE_MASK = 0x0000c000, INST_PARSE_PACKET_END = 0x0000c000, INST_PARSE_LOOP_END = 0x00008000, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index fde935b2758b..843072302b21 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Hexagon.h" +#include "MCTargetDesc/HexagonFixupKinds.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/Support/Debug.h" @@ -40,17 +41,306 @@ HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C) unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/, MCFixup const &Fixup, bool IsPCRel) const { + // determine the type of the relocation unsigned Type = (unsigned)ELF::R_HEX_NONE; - llvm::MCFixupKind Kind = Fixup.getKind(); + unsigned Kind = (unsigned)Fixup.getKind(); switch (Kind) { - default: - DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n"); - llvm_unreachable("Unimplemented Fixup kind!"); - break; - case FK_Data_4: - Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32; - break; + default: + DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n"); + llvm_unreachable("Unimplemented Fixup kind!"); + break; + case FK_Data_4: + Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32; + break; + case FK_PCRel_4: + Type = ELF::R_HEX_32_PCREL; + break; + case FK_Data_2: + Type = ELF::R_HEX_16; + break; + case FK_Data_1: + Type = ELF::R_HEX_8; + break; + case fixup_Hexagon_B22_PCREL: + Type = ELF::R_HEX_B22_PCREL; + break; + case fixup_Hexagon_B15_PCREL: + Type = ELF::R_HEX_B15_PCREL; + break; + case fixup_Hexagon_B7_PCREL: + Type = ELF::R_HEX_B7_PCREL; + break; + case fixup_Hexagon_LO16: + Type = ELF::R_HEX_LO16; + break; + case fixup_Hexagon_HI16: + Type = ELF::R_HEX_HI16; + break; + case fixup_Hexagon_32: + Type = ELF::R_HEX_32; + break; + case fixup_Hexagon_16: + Type = ELF::R_HEX_16; + break; + case fixup_Hexagon_8: + Type = ELF::R_HEX_8; + break; + case fixup_Hexagon_GPREL16_0: + Type = ELF::R_HEX_GPREL16_0; + break; + case fixup_Hexagon_GPREL16_1: + Type = ELF::R_HEX_GPREL16_1; + break; + case fixup_Hexagon_GPREL16_2: + Type = ELF::R_HEX_GPREL16_2; + break; + case fixup_Hexagon_GPREL16_3: + Type = ELF::R_HEX_GPREL16_3; + break; + case fixup_Hexagon_HL16: + Type = ELF::R_HEX_HL16; + break; + case fixup_Hexagon_B13_PCREL: + Type = ELF::R_HEX_B13_PCREL; + break; + case fixup_Hexagon_B9_PCREL: + Type = ELF::R_HEX_B9_PCREL; + break; + case fixup_Hexagon_B32_PCREL_X: + Type = ELF::R_HEX_B32_PCREL_X; + break; + case fixup_Hexagon_32_6_X: + Type = ELF::R_HEX_32_6_X; + break; + case fixup_Hexagon_B22_PCREL_X: + Type = ELF::R_HEX_B22_PCREL_X; + break; + case fixup_Hexagon_B15_PCREL_X: + Type = ELF::R_HEX_B15_PCREL_X; + break; + case fixup_Hexagon_B13_PCREL_X: + Type = ELF::R_HEX_B13_PCREL_X; + break; + case fixup_Hexagon_B9_PCREL_X: + Type = ELF::R_HEX_B9_PCREL_X; + break; + case fixup_Hexagon_B7_PCREL_X: + Type = ELF::R_HEX_B7_PCREL_X; + break; + case fixup_Hexagon_16_X: + Type = ELF::R_HEX_16_X; + break; + case fixup_Hexagon_12_X: + Type = ELF::R_HEX_12_X; + break; + case fixup_Hexagon_11_X: + Type = ELF::R_HEX_11_X; + break; + case fixup_Hexagon_10_X: + Type = ELF::R_HEX_10_X; + break; + case fixup_Hexagon_9_X: + Type = ELF::R_HEX_9_X; + break; + case fixup_Hexagon_8_X: + Type = ELF::R_HEX_8_X; + break; + case fixup_Hexagon_7_X: + Type = ELF::R_HEX_7_X; + break; + case fixup_Hexagon_6_X: + Type = ELF::R_HEX_6_X; + break; + case fixup_Hexagon_32_PCREL: + Type = ELF::R_HEX_32_PCREL; + break; + case fixup_Hexagon_COPY: + Type = ELF::R_HEX_COPY; + break; + case fixup_Hexagon_GLOB_DAT: + Type = ELF::R_HEX_GLOB_DAT; + break; + case fixup_Hexagon_JMP_SLOT: + Type = ELF::R_HEX_JMP_SLOT; + break; + case fixup_Hexagon_RELATIVE: + Type = ELF::R_HEX_RELATIVE; + break; + case fixup_Hexagon_PLT_B22_PCREL: + Type = ELF::R_HEX_PLT_B22_PCREL; + break; + case fixup_Hexagon_GOTREL_LO16: + Type = ELF::R_HEX_GOTREL_LO16; + break; + case fixup_Hexagon_GOTREL_HI16: + Type = ELF::R_HEX_GOTREL_HI16; + break; + case fixup_Hexagon_GOTREL_32: + Type = ELF::R_HEX_GOTREL_32; + break; + case fixup_Hexagon_GOT_LO16: + Type = ELF::R_HEX_GOT_LO16; + break; + case fixup_Hexagon_GOT_HI16: + Type = ELF::R_HEX_GOT_HI16; + break; + case fixup_Hexagon_GOT_32: + Type = ELF::R_HEX_GOT_32; + break; + case fixup_Hexagon_GOT_16: + Type = ELF::R_HEX_GOT_16; + break; + case fixup_Hexagon_DTPMOD_32: + Type = ELF::R_HEX_DTPMOD_32; + break; + case fixup_Hexagon_DTPREL_LO16: + Type = ELF::R_HEX_DTPREL_LO16; + break; + case fixup_Hexagon_DTPREL_HI16: + Type = ELF::R_HEX_DTPREL_HI16; + break; + case fixup_Hexagon_DTPREL_32: + Type = ELF::R_HEX_DTPREL_32; + break; + case fixup_Hexagon_DTPREL_16: + Type = ELF::R_HEX_DTPREL_16; + break; + case fixup_Hexagon_GD_PLT_B22_PCREL: + Type = ELF::R_HEX_GD_PLT_B22_PCREL; + break; + case fixup_Hexagon_LD_PLT_B22_PCREL: + Type = ELF::R_HEX_LD_PLT_B22_PCREL; + break; + case fixup_Hexagon_GD_GOT_LO16: + Type = ELF::R_HEX_GD_GOT_LO16; + break; + case fixup_Hexagon_GD_GOT_HI16: + Type = ELF::R_HEX_GD_GOT_HI16; + break; + case fixup_Hexagon_GD_GOT_32: + Type = ELF::R_HEX_GD_GOT_32; + break; + case fixup_Hexagon_GD_GOT_16: + Type = ELF::R_HEX_GD_GOT_16; + break; + case fixup_Hexagon_LD_GOT_LO16: + Type = ELF::R_HEX_LD_GOT_LO16; + break; + case fixup_Hexagon_LD_GOT_HI16: + Type = ELF::R_HEX_LD_GOT_HI16; + break; + case fixup_Hexagon_LD_GOT_32: + Type = ELF::R_HEX_LD_GOT_32; + break; + case fixup_Hexagon_LD_GOT_16: + Type = ELF::R_HEX_LD_GOT_16; + break; + case fixup_Hexagon_IE_LO16: + Type = ELF::R_HEX_IE_LO16; + break; + case fixup_Hexagon_IE_HI16: + Type = ELF::R_HEX_IE_HI16; + break; + case fixup_Hexagon_IE_32: + Type = ELF::R_HEX_IE_32; + break; + case fixup_Hexagon_IE_GOT_LO16: + Type = ELF::R_HEX_IE_GOT_LO16; + break; + case fixup_Hexagon_IE_GOT_HI16: + Type = ELF::R_HEX_IE_GOT_HI16; + break; + case fixup_Hexagon_IE_GOT_32: + Type = ELF::R_HEX_IE_GOT_32; + break; + case fixup_Hexagon_IE_GOT_16: + Type = ELF::R_HEX_IE_GOT_16; + break; + case fixup_Hexagon_TPREL_LO16: + Type = ELF::R_HEX_TPREL_LO16; + break; + case fixup_Hexagon_TPREL_HI16: + Type = ELF::R_HEX_TPREL_HI16; + break; + case fixup_Hexagon_TPREL_32: + Type = ELF::R_HEX_TPREL_32; + break; + case fixup_Hexagon_TPREL_16: + Type = ELF::R_HEX_TPREL_16; + break; + case fixup_Hexagon_6_PCREL_X: + Type = ELF::R_HEX_6_PCREL_X; + break; + case fixup_Hexagon_GOTREL_32_6_X: + Type = ELF::R_HEX_GOTREL_32_6_X; + break; + case fixup_Hexagon_GOTREL_16_X: + Type = ELF::R_HEX_GOTREL_16_X; + break; + case fixup_Hexagon_GOTREL_11_X: + Type = ELF::R_HEX_GOTREL_11_X; + break; + case fixup_Hexagon_GOT_32_6_X: + Type = ELF::R_HEX_GOT_32_6_X; + break; + case fixup_Hexagon_GOT_16_X: + Type = ELF::R_HEX_GOT_16_X; + break; + case fixup_Hexagon_GOT_11_X: + Type = ELF::R_HEX_GOT_11_X; + break; + case fixup_Hexagon_DTPREL_32_6_X: + Type = ELF::R_HEX_DTPREL_32_6_X; + break; + case fixup_Hexagon_DTPREL_16_X: + Type = ELF::R_HEX_DTPREL_16_X; + break; + case fixup_Hexagon_DTPREL_11_X: + Type = ELF::R_HEX_DTPREL_11_X; + break; + case fixup_Hexagon_GD_GOT_32_6_X: + Type = ELF::R_HEX_GD_GOT_32_6_X; + break; + case fixup_Hexagon_GD_GOT_16_X: + Type = ELF::R_HEX_GD_GOT_16_X; + break; + case fixup_Hexagon_GD_GOT_11_X: + Type = ELF::R_HEX_GD_GOT_11_X; + break; + case fixup_Hexagon_LD_GOT_32_6_X: + Type = ELF::R_HEX_LD_GOT_32_6_X; + break; + case fixup_Hexagon_LD_GOT_16_X: + Type = ELF::R_HEX_LD_GOT_16_X; + break; + case fixup_Hexagon_LD_GOT_11_X: + Type = ELF::R_HEX_LD_GOT_11_X; + break; + case fixup_Hexagon_IE_32_6_X: + Type = ELF::R_HEX_IE_32_6_X; + break; + case fixup_Hexagon_IE_16_X: + Type = ELF::R_HEX_IE_16_X; + break; + case fixup_Hexagon_IE_GOT_32_6_X: + Type = ELF::R_HEX_IE_GOT_32_6_X; + break; + case fixup_Hexagon_IE_GOT_16_X: + Type = ELF::R_HEX_IE_GOT_16_X; + break; + case fixup_Hexagon_IE_GOT_11_X: + Type = ELF::R_HEX_IE_GOT_11_X; + break; + case fixup_Hexagon_TPREL_32_6_X: + Type = ELF::R_HEX_TPREL_32_6_X; + break; + case fixup_Hexagon_TPREL_16_X: + Type = ELF::R_HEX_TPREL_16_X; + break; + case fixup_Hexagon_TPREL_11_X: + Type = ELF::R_HEX_TPREL_11_X; + break; } return Type; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 15cda717cf1c..36f81465eef6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -28,7 +28,47 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" -const char HexagonInstPrinter::PacketPadding = '\t'; +HexagonAsmInstPrinter::HexagonAsmInstPrinter(MCInstPrinter *RawPrinter) + : MCInstPrinter(*RawPrinter), RawPrinter(RawPrinter) {} + +void HexagonAsmInstPrinter::printInst(MCInst const *MI, raw_ostream &O, + StringRef Annot, + MCSubtargetInfo const &STI) { + assert(HexagonMCInstrInfo::isBundle(*MI)); + assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); + std::string Buffer; + { + raw_string_ostream TempStream(Buffer); + RawPrinter->printInst(MI, TempStream, "", STI); + } + StringRef Contents(Buffer); + auto PacketBundle = Contents.rsplit('\n'); + auto HeadTail = PacketBundle.first.split('\n'); + auto Preamble = "\t{\n\t\t"; + auto Separator = ""; + while(!HeadTail.first.empty()) { + O << Separator; + StringRef Inst; + auto Duplex = HeadTail.first.split('\v'); + if(!Duplex.second.empty()){ + O << Duplex.first << "\n"; + Inst = Duplex.second; + } + else + Inst = Duplex.first; + O << Preamble; + O << Inst; + HeadTail = HeadTail.second.split('\n'); + Preamble = ""; + Separator = "\n\t\t"; + } + O << "\n\t}" << PacketBundle.second; +} + +void HexagonAsmInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { + RawPrinter->printRegName(O, RegNo); +} + // Return the minimum value that a constant extendable operand can have // without being extended. static int getMinValue(uint64_t TSFlags) { @@ -77,48 +117,44 @@ void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } -void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O, - StringRef Annot, - const MCSubtargetInfo &STI) { - const char startPacket = '{', - endPacket = '}'; - // TODO: add outer HW loop when it's supported too. - if (MI->getOpcode() == Hexagon::ENDLOOP0) { - // Ending a harware loop is different from ending an regular packet. - assert(HexagonMCInstrInfo::isPacketEnd(*MI) && "Loop-end must also end the packet"); - - if (HexagonMCInstrInfo::isPacketBegin(*MI)) { - // There must be a packet to end a loop. - // FIXME: when shuffling is always run, this shouldn't be needed. - MCInst Nop; - StringRef NoAnnot; - - Nop.setOpcode (Hexagon::A2_nop); - HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI)); - printInst (&Nop, O, NoAnnot, STI); - } +void HexagonInstPrinter::setExtender(MCInst const &MCI) { + HasExtender = HexagonMCInstrInfo::isImmext(MCI); +} - // Close the packet. - if (HexagonMCInstrInfo::isPacketEnd(*MI)) - O << PacketPadding << endPacket; +void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS, + StringRef Annot, + MCSubtargetInfo const &STI) { + assert(HexagonMCInstrInfo::isBundle(*MI)); + assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); + HasExtender = false; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) { + MCInst const &MCI = *I.getInst(); + if (HexagonMCInstrInfo::isDuplex(MII, MCI)) { + printInstruction(MCI.getOperand(1).getInst(), OS); + OS << '\v'; + HasExtender = false; + printInstruction(MCI.getOperand(0).getInst(), OS); + } else + printInstruction(&MCI, OS); + setExtender(MCI); + OS << "\n"; + } - printInstruction(MI, O); + auto Separator = ""; + if (HexagonMCInstrInfo::isInnerLoop(*MI)) { + OS << Separator; + Separator = " "; + MCInst ME; + ME.setOpcode(Hexagon::ENDLOOP0); + printInstruction(&ME, OS); } - else { - // Prefix the insn opening the packet. - if (HexagonMCInstrInfo::isPacketBegin(*MI)) - O << PacketPadding << startPacket << '\n'; - - printInstruction(MI, O); - - // Suffix the insn closing the packet. - if (HexagonMCInstrInfo::isPacketEnd(*MI)) - // Suffix the packet in a new line always, since the GNU assembler has - // issues with a closing brace on the same line as CONST{32,64}. - O << '\n' << PacketPadding << endPacket; + if (HexagonMCInstrInfo::isOuterLoop(*MI)) { + OS << Separator; + Separator = " "; + MCInst ME; + ME.setOpcode(Hexagon::ENDLOOP1); + printInstruction(&ME, OS); } - - printAnnotation(O, Annot); } void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -128,7 +164,7 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (MO.isReg()) { printRegName(O, MO.getReg()); } else if(MO.isExpr()) { - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); } else if(MO.isImm()) { printImmOperand(MI, OpNo, O); } else { @@ -141,7 +177,7 @@ void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo, const MCOperand& MO = MI->getOperand(OpNo); if(MO.isExpr()) { - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); } else if(MO.isImm()) { O << MI->getOperand(OpNo).getImm(); } else { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index 3fedaed8fbf9..534ac237d635 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -18,6 +18,21 @@ #include "llvm/MC/MCInstrInfo.h" namespace llvm { +class HexagonAsmInstPrinter : public MCInstPrinter { +public: + HexagonAsmInstPrinter(MCInstPrinter *RawPrinter); + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, + MCSubtargetInfo const &STI) override; + void printRegName(raw_ostream &O, unsigned RegNo) const override; + std::unique_ptr<MCInstPrinter> RawPrinter; +}; +/// Prints bundles as a newline separated list of individual instructions +/// Duplexes are separated by a vertical tab \v character +/// A trailing line includes bundle properties such as endloop0/1 +/// +/// r0 = add(r1, r2) +/// r0 = #0 \v jump 0x0 +/// :endloop0 :endloop1 class HexagonInstPrinter : public MCInstPrinter { public: explicit HexagonInstPrinter(MCAsmInfo const &MAI, @@ -74,11 +89,11 @@ namespace llvm { void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) const; - static const char PacketPadding; - private: const MCInstrInfo &MII; + bool HasExtender; + void setExtender(MCInst const &MCI); }; } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index ad5e0fb15e7f..51d2f1c878dc 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; // Pin the vtable to this file. void HexagonMCAsmInfo::anchor() {} -HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) { +HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; Data64bitsDirective = nullptr; // .xword is only supported by V9. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h index ab18f0b37ba6..dc0706994786 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -18,10 +18,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { + class Triple; + class HexagonMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit HexagonMCAsmInfo(StringRef TT); + explicit HexagonMCAsmInfo(const Triple &TT); }; } // namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index ae3953abba10..1eee852996fd 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "mccodeemitter" @@ -31,38 +32,206 @@ using namespace Hexagon; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); -namespace { -/// \brief 10.6 Instruction Packets -/// Possible values for instruction packet parse field. -enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 }; -/// \brief Returns the packet bits based on instruction position. -uint32_t getPacketBits(MCInst const &HMI) { - unsigned const ParseFieldOffset = 14; - ParseField Field = HexagonMCInstrInfo::isPacketEnd(HMI) ? ParseField::end - : ParseField::last0; - return static_cast<uint32_t>(Field) << ParseFieldOffset; -} -void emitLittleEndian(uint64_t Binary, raw_ostream &OS) { - OS << static_cast<uint8_t>((Binary >> 0x00) & 0xff); - OS << static_cast<uint8_t>((Binary >> 0x08) & 0xff); - OS << static_cast<uint8_t>((Binary >> 0x10) & 0xff); - OS << static_cast<uint8_t>((Binary >> 0x18) & 0xff); -} -} - HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT) : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)), - Extended(new bool(false)) {} + Extended(new bool(false)), CurrentBundle(new MCInst const *) {} + +uint32_t HexagonMCCodeEmitter::parseBits(size_t Instruction, size_t Last, + MCInst const &MCB, + MCInst const &MCI) const { + bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI); + if (Instruction == 0) { + if (HexagonMCInstrInfo::isInnerLoop(MCB)) { + assert(!Duplex); + assert(Instruction != Last); + return HexagonII::INST_PARSE_LOOP_END; + } + } + if (Instruction == 1) { + if (HexagonMCInstrInfo::isOuterLoop(MCB)) { + assert(!Duplex); + assert(Instruction != Last); + return HexagonII::INST_PARSE_LOOP_END; + } + } + if (Duplex) { + assert(Instruction == Last); + return HexagonII::INST_PARSE_DUPLEX; + } + if(Instruction == Last) + return HexagonII::INST_PARSE_PACKET_END; + return HexagonII::INST_PARSE_NOT_END; +} void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const { - uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI) | getPacketBits(MI); - assert(HexagonMCInstrInfo::getDesc(MCII, MI).getSize() == 4 && - "All instructions should be 32bit"); - (void)&MCII; - emitLittleEndian(Binary, OS); + MCInst &HMB = const_cast<MCInst &>(MI); + + assert(HexagonMCInstrInfo::isBundle(HMB)); + DEBUG(dbgs() << "Encoding bundle\n";); + *Addend = 0; + *Extended = false; + *CurrentBundle = &MI; + size_t Instruction = 0; + size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1; + for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) { + MCInst &HMI = const_cast<MCInst &>(*I.getInst()); + EncodeSingleInstruction(HMI, OS, Fixups, STI, + parseBits(Instruction, Last, HMB, HMI), + Instruction); + *Extended = HexagonMCInstrInfo::isImmext(HMI); + *Addend += HEXAGON_INSTR_SIZE; + ++Instruction; + } + return; +} + +/// EncodeSingleInstruction - Emit a single +void HexagonMCCodeEmitter::EncodeSingleInstruction( + const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI, uint32_t Parse, size_t Index) const { + MCInst HMB = MI; + assert(!HexagonMCInstrInfo::isBundle(HMB)); + uint64_t Binary; + + // Pseudo instructions don't get encoded and shouldn't be here + // in the first place! + assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() && + "pseudo-instruction found"); + DEBUG(dbgs() << "Encoding insn" + " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" + "\n"); + + if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) { + // Calculate the new value distance to the associated producer + MCOperand &MCO = + HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB)); + unsigned SOffset = 0; + unsigned Register = MCO.getReg(); + unsigned Register1; + auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); + auto i = Instructions.begin() + Index - 1; + for (;; --i) { + assert(i != Instructions.begin() - 1 && "Couldn't find producer"); + MCInst const &Inst = *i->getInst(); + if (HexagonMCInstrInfo::isImmext(Inst)) + continue; + ++SOffset; + Register1 = + HexagonMCInstrInfo::hasNewValue(MCII, Inst) + ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg() + : static_cast<unsigned>(Hexagon::NoRegister); + if (Register != Register1) + // This isn't the register we're looking for + continue; + if (!HexagonMCInstrInfo::isPredicated(MCII, Inst)) + // Producer is unpredicated + break; + assert(HexagonMCInstrInfo::isPredicated(MCII, HMB) && + "Unpredicated consumer depending on predicated producer"); + if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) == + HexagonMCInstrInfo::isPredicatedTrue(MCII, HMB)) + // Producer predicate sense matched ours + break; + } + // Hexagon PRM 10.11 Construct Nt from distance + unsigned Offset = SOffset; + Offset <<= 1; + MCO.setReg(Offset + Hexagon::R0); + } + + Binary = getBinaryCodeForInstr(HMB, Fixups, STI); + // Check for unimplemented instructions. Immediate extenders + // are encoded as zero, so they need to be accounted for. + if ((!Binary) && + ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) && + (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) && + (HMB.getOpcode() != A4_ext_g))) { + // Use a A2_nop for unimplemented instructions. + DEBUG(dbgs() << "Unimplemented inst: " + " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" + "\n"); + llvm_unreachable("Unimplemented Instruction"); + } + Binary |= Parse; + + // if we need to emit a duplexed instruction + if (HMB.getOpcode() >= Hexagon::DuplexIClass0 && + HMB.getOpcode() <= Hexagon::DuplexIClassF) { + assert(Parse == HexagonII::INST_PARSE_DUPLEX && + "Emitting duplex without duplex parse bits"); + unsigned dupIClass; + switch (HMB.getOpcode()) { + case Hexagon::DuplexIClass0: + dupIClass = 0; + break; + case Hexagon::DuplexIClass1: + dupIClass = 1; + break; + case Hexagon::DuplexIClass2: + dupIClass = 2; + break; + case Hexagon::DuplexIClass3: + dupIClass = 3; + break; + case Hexagon::DuplexIClass4: + dupIClass = 4; + break; + case Hexagon::DuplexIClass5: + dupIClass = 5; + break; + case Hexagon::DuplexIClass6: + dupIClass = 6; + break; + case Hexagon::DuplexIClass7: + dupIClass = 7; + break; + case Hexagon::DuplexIClass8: + dupIClass = 8; + break; + case Hexagon::DuplexIClass9: + dupIClass = 9; + break; + case Hexagon::DuplexIClassA: + dupIClass = 10; + break; + case Hexagon::DuplexIClassB: + dupIClass = 11; + break; + case Hexagon::DuplexIClassC: + dupIClass = 12; + break; + case Hexagon::DuplexIClassD: + dupIClass = 13; + break; + case Hexagon::DuplexIClassE: + dupIClass = 14; + break; + case Hexagon::DuplexIClassF: + dupIClass = 15; + break; + default: + llvm_unreachable("Unimplemented DuplexIClass"); + break; + } + // 29 is the bit position. + // 0b1110 =0xE bits are masked off and down shifted by 1 bit. + // Last bit is moved to bit position 13 + Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13); + + const MCInst *subInst0 = HMB.getOperand(0).getInst(); + const MCInst *subInst1 = HMB.getOperand(1).getInst(); + + // get subinstruction slot 0 + unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI); + // get subinstruction slot 1 + unsigned subInstSlot1Bits = getBinaryCodeForInstr(*subInst1, Fixups, STI); + + Binary |= subInstSlot0Bits | (subInstSlot1Bits << 16); + } + support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); ++MCNumEmitted; } @@ -182,7 +351,7 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, { int64_t Res; - if (ME->EvaluateAsAbsolute(Res)) + if (ME->evaluateAsAbsolute(Res)) return Res; MCExpr::ExprKind MK = ME->getKind(); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index 939380af1013..9aa258cee4c6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -30,6 +30,7 @@ class HexagonMCCodeEmitter : public MCCodeEmitter { MCInstrInfo const &MCII; std::unique_ptr<unsigned> Addend; std::unique_ptr<bool> Extended; + std::unique_ptr<MCInst const *> CurrentBundle; // helper routine for getMachineOpValue() unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO, @@ -39,12 +40,21 @@ class HexagonMCCodeEmitter : public MCCodeEmitter { public: HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT); + // Return parse bits for instruction `MCI' inside bundle `MCB' + uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB, + MCInst const &MCI) const; + MCSubtargetInfo const &getSubtargetInfo() const; void encodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const override; + void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI, + uint32_t Parse, size_t Index) const; + // \brief TableGen'erated function for getting the // binary encoding for an instruction. uint64_t getBinaryCodeForInstr(MCInst const &MI, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp new file mode 100644 index 000000000000..108093547f82 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -0,0 +1,420 @@ + +//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is looks at a packet and tries to form compound insns +// +//===----------------------------------------------------------------------===// +#include "Hexagon.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace Hexagon; + +#define DEBUG_TYPE "hexagon-mccompound" + +enum OpcodeIndex { + fp0_jump_nt = 0, + fp0_jump_t, + fp1_jump_nt, + fp1_jump_t, + tp0_jump_nt, + tp0_jump_t, + tp1_jump_nt, + tp1_jump_t +}; + +unsigned tstBitOpcode[8] = {J4_tstbit0_fp0_jump_nt, J4_tstbit0_fp0_jump_t, + J4_tstbit0_fp1_jump_nt, J4_tstbit0_fp1_jump_t, + J4_tstbit0_tp0_jump_nt, J4_tstbit0_tp0_jump_t, + J4_tstbit0_tp1_jump_nt, J4_tstbit0_tp1_jump_t}; +unsigned cmpeqBitOpcode[8] = {J4_cmpeq_fp0_jump_nt, J4_cmpeq_fp0_jump_t, + J4_cmpeq_fp1_jump_nt, J4_cmpeq_fp1_jump_t, + J4_cmpeq_tp0_jump_nt, J4_cmpeq_tp0_jump_t, + J4_cmpeq_tp1_jump_nt, J4_cmpeq_tp1_jump_t}; +unsigned cmpgtBitOpcode[8] = {J4_cmpgt_fp0_jump_nt, J4_cmpgt_fp0_jump_t, + J4_cmpgt_fp1_jump_nt, J4_cmpgt_fp1_jump_t, + J4_cmpgt_tp0_jump_nt, J4_cmpgt_tp0_jump_t, + J4_cmpgt_tp1_jump_nt, J4_cmpgt_tp1_jump_t}; +unsigned cmpgtuBitOpcode[8] = {J4_cmpgtu_fp0_jump_nt, J4_cmpgtu_fp0_jump_t, + J4_cmpgtu_fp1_jump_nt, J4_cmpgtu_fp1_jump_t, + J4_cmpgtu_tp0_jump_nt, J4_cmpgtu_tp0_jump_t, + J4_cmpgtu_tp1_jump_nt, J4_cmpgtu_tp1_jump_t}; +unsigned cmpeqiBitOpcode[8] = {J4_cmpeqi_fp0_jump_nt, J4_cmpeqi_fp0_jump_t, + J4_cmpeqi_fp1_jump_nt, J4_cmpeqi_fp1_jump_t, + J4_cmpeqi_tp0_jump_nt, J4_cmpeqi_tp0_jump_t, + J4_cmpeqi_tp1_jump_nt, J4_cmpeqi_tp1_jump_t}; +unsigned cmpgtiBitOpcode[8] = {J4_cmpgti_fp0_jump_nt, J4_cmpgti_fp0_jump_t, + J4_cmpgti_fp1_jump_nt, J4_cmpgti_fp1_jump_t, + J4_cmpgti_tp0_jump_nt, J4_cmpgti_tp0_jump_t, + J4_cmpgti_tp1_jump_nt, J4_cmpgti_tp1_jump_t}; +unsigned cmpgtuiBitOpcode[8] = {J4_cmpgtui_fp0_jump_nt, J4_cmpgtui_fp0_jump_t, + J4_cmpgtui_fp1_jump_nt, J4_cmpgtui_fp1_jump_t, + J4_cmpgtui_tp0_jump_nt, J4_cmpgtui_tp0_jump_t, + J4_cmpgtui_tp1_jump_nt, J4_cmpgtui_tp1_jump_t}; +unsigned cmpeqn1BitOpcode[8] = {J4_cmpeqn1_fp0_jump_nt, J4_cmpeqn1_fp0_jump_t, + J4_cmpeqn1_fp1_jump_nt, J4_cmpeqn1_fp1_jump_t, + J4_cmpeqn1_tp0_jump_nt, J4_cmpeqn1_tp0_jump_t, + J4_cmpeqn1_tp1_jump_nt, J4_cmpeqn1_tp1_jump_t}; +unsigned cmpgtn1BitOpcode[8] = { + J4_cmpgtn1_fp0_jump_nt, J4_cmpgtn1_fp0_jump_t, J4_cmpgtn1_fp1_jump_nt, + J4_cmpgtn1_fp1_jump_t, J4_cmpgtn1_tp0_jump_nt, J4_cmpgtn1_tp0_jump_t, + J4_cmpgtn1_tp1_jump_nt, J4_cmpgtn1_tp1_jump_t, +}; + +// enum HexagonII::CompoundGroup +namespace { +unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + + switch (MI.getOpcode()) { + default: + return HexagonII::HCG_None; + // + // Compound pairs. + // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2" + // "Rd16=#U6 ; jump #r9:2" + // "Rd16=Rs16 ; jump #r9:2" + // + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtu: + if (IsExtended) + return false; + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); + Src2Reg = MI.getOperand(2).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) + return HexagonII::HCG_A; + break; + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + if (IsExtended) + return false; + // P0 = cmp.eq(Rs,#u2) + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MI.getOperand(2).isImm() && ((isUInt<5>(MI.getOperand(2).getImm())) || + (MI.getOperand(2).getImm() == -1))) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfr: + if (IsExtended) + return false; + // Rd = Rs + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfrsi: + if (IsExtended) + return false; + // Rd = #u6 + DstReg = MI.getOperand(0).getReg(); + if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() <= 63 && + MI.getOperand(1).getImm() >= 0 && + HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) + return HexagonII::HCG_A; + break; + case Hexagon::S2_tstbit_i: + if (IsExtended) + return false; + DstReg = MI.getOperand(0).getReg(); + Src1Reg = MI.getOperand(1).getReg(); + if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + MI.getOperand(2).isImm() && + HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + (MI.getOperand(2).getImm() == 0)) + return HexagonII::HCG_A; + break; + // The fact that .new form is used pretty much guarantees + // that predicate register will match. Nevertheless, + // there could be some false positives without additional + // checking. + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnewpt: + Src1Reg = MI.getOperand(0).getReg(); + if (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg) + return HexagonII::HCG_B; + break; + // Transfer and jump: + // Rd=#U6 ; jump #r9:2 + // Rd=Rs ; jump #r9:2 + // Do not test for jump range here. + case Hexagon::J2_jump: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + return HexagonII::HCG_C; + break; + } + + return HexagonII::HCG_None; +} +} + +/// getCompoundOp - Return the index from 0-7 into the above opcode lists. +namespace { +unsigned getCompoundOp(MCInst const &HMCI) { + const MCOperand &Predicate = HMCI.getOperand(0); + unsigned PredReg = Predicate.getReg(); + + assert((PredReg == Hexagon::P0) || (PredReg == Hexagon::P1) || + (PredReg == Hexagon::P2) || (PredReg == Hexagon::P3)); + + switch (HMCI.getOpcode()) { + default: + llvm_unreachable("Expected match not found.\n"); + break; + case Hexagon::J2_jumpfnew: + return (PredReg == Hexagon::P0) ? fp0_jump_nt : fp1_jump_nt; + case Hexagon::J2_jumpfnewpt: + return (PredReg == Hexagon::P0) ? fp0_jump_t : fp1_jump_t; + case Hexagon::J2_jumptnew: + return (PredReg == Hexagon::P0) ? tp0_jump_nt : tp1_jump_nt; + case Hexagon::J2_jumptnewpt: + return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t; + } +} +} + +namespace { +MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { + MCInst *CompoundInsn = 0; + unsigned compoundOpcode; + MCOperand Rs, Rt; + + switch (L.getOpcode()) { + default: + DEBUG(dbgs() << "Possible compound ignored\n"); + return CompoundInsn; + + case Hexagon::A2_tfrsi: + Rt = L.getOperand(0); + compoundOpcode = J4_jumpseti; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(L.getOperand(1)); // Immediate + CompoundInsn->addOperand(R.getOperand(0)); // Jump target + break; + + case Hexagon::A2_tfr: + Rt = L.getOperand(0); + Rs = L.getOperand(1); + + compoundOpcode = J4_jumpsetr; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(R.getOperand(0)); // Jump target. + + break; + + case Hexagon::C2_cmpeq: + DEBUG(dbgs() << "CX: C2_cmpeq\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpeqBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgt: + DEBUG(dbgs() << "CX: C2_cmpgt\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpgtBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgtu: + DEBUG(dbgs() << "CX: C2_cmpgtu\n"); + Rs = L.getOperand(1); + Rt = L.getOperand(2); + + compoundOpcode = cmpgtuBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(Rt); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpeqi: + DEBUG(dbgs() << "CX: C2_cmpeqi\n"); + if (L.getOperand(2).getImm() == -1) + compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)]; + else + compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)]; + + Rs = L.getOperand(1); + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + if (L.getOperand(2).getImm() != -1) + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgti: + DEBUG(dbgs() << "CX: C2_cmpgti\n"); + if (L.getOperand(2).getImm() == -1) + compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)]; + else + compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)]; + + Rs = L.getOperand(1); + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + if (L.getOperand(2).getImm() != -1) + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::C2_cmpgtui: + DEBUG(dbgs() << "CX: C2_cmpgtui\n"); + Rs = L.getOperand(1); + compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(L.getOperand(2)); + CompoundInsn->addOperand(R.getOperand(1)); + break; + + case Hexagon::S2_tstbit_i: + DEBUG(dbgs() << "CX: S2_tstbit_i\n"); + Rs = L.getOperand(1); + compoundOpcode = tstBitOpcode[getCompoundOp(R)]; + CompoundInsn = new (Context) MCInst; + CompoundInsn->setOpcode(compoundOpcode); + CompoundInsn->addOperand(Rs); + CompoundInsn->addOperand(R.getOperand(1)); + break; + } + + return CompoundInsn; +} +} + +/// Non-Symmetrical. See if these two instructions are fit for compound pair. +namespace { +bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, + MCInst const &MIb, bool IsExtendedB) { + unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA); + unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB); + // We have two candidates - check that this is the same register + // we are talking about. + unsigned Opca = MIa.getOpcode(); + if (MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_C && + (Opca == Hexagon::A2_tfr || Opca == Hexagon::A2_tfrsi)) + return true; + return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) && + (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg())); +} +} + +namespace { +bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { + assert(HexagonMCInstrInfo::isBundle(MCI)); + bool JExtended = false; + for (MCInst::iterator J = + MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset; + J != MCI.end(); ++J) { + MCInst const *JumpInst = J->getInst(); + if (HexagonMCInstrInfo::isImmext(*JumpInst)) { + JExtended = true; + continue; + } + if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) == + HexagonII::TypeJ) { + // Try to pair with another insn (B)undled with jump. + bool BExtended = false; + for (MCInst::iterator B = + MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset; + B != MCI.end(); ++B) { + MCInst const *Inst = B->getInst(); + if (JumpInst == Inst) + continue; + if (HexagonMCInstrInfo::isImmext(*Inst)) { + BExtended = true; + continue; + } + DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << "," + << Inst->getOpcode() << "\n"); + if (isOrderedCompoundPair(*Inst, BExtended, *JumpInst, JExtended)) { + MCInst *CompoundInsn = getCompoundInsn(Context, *Inst, *JumpInst); + if (CompoundInsn) { + DEBUG(dbgs() << "B: " << Inst->getOpcode() << "," + << JumpInst->getOpcode() << " Compounds to " + << CompoundInsn->getOpcode() << "\n"); + J->setInst(CompoundInsn); + MCI.erase(B); + return true; + } + } + BExtended = false; + } + } + JExtended = false; + } + return false; +} +} + +/// tryCompound - Given a bundle check for compound insns when one +/// is found update the contents fo the bundle with the compound insn. +/// If a compound instruction is found then the bundle will have one +/// additional slot. +void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, + MCContext &Context, MCInst &MCI) { + assert(MCI.getOpcode() == Hexagon::BUNDLE && + "Non-Bundle where Bundle expected"); + + // By definition a compound must have 2 insn. + if (MCI.size() < 2) + return; + + // Look for compounds until none are found, only update the bundle when + // a compound is found. + while (lookForCompound(MCII, Context, MCI)) + ; + + return; +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp new file mode 100644 index 000000000000..eb629774a2cd --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -0,0 +1,1100 @@ +//===----- HexagonMCDuplexInfo.cpp - Instruction bundle checking ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements duplexing of instructions to reduce code size +// +//===----------------------------------------------------------------------===// + +#include "HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <map> + +using namespace llvm; +using namespace Hexagon; + +#define DEBUG_TYPE "hexagon-mcduplex-info" + +// pair table of subInstructions with opcodes +static std::pair<unsigned, unsigned> opcodeData[] = { + std::make_pair((unsigned)V4_SA1_addi, 0), + std::make_pair((unsigned)V4_SA1_addrx, 6144), + std::make_pair((unsigned)V4_SA1_addsp, 3072), + std::make_pair((unsigned)V4_SA1_and1, 4608), + std::make_pair((unsigned)V4_SA1_clrf, 6768), + std::make_pair((unsigned)V4_SA1_clrfnew, 6736), + std::make_pair((unsigned)V4_SA1_clrt, 6752), + std::make_pair((unsigned)V4_SA1_clrtnew, 6720), + std::make_pair((unsigned)V4_SA1_cmpeqi, 6400), + std::make_pair((unsigned)V4_SA1_combine0i, 7168), + std::make_pair((unsigned)V4_SA1_combine1i, 7176), + std::make_pair((unsigned)V4_SA1_combine2i, 7184), + std::make_pair((unsigned)V4_SA1_combine3i, 7192), + std::make_pair((unsigned)V4_SA1_combinerz, 7432), + std::make_pair((unsigned)V4_SA1_combinezr, 7424), + std::make_pair((unsigned)V4_SA1_dec, 4864), + std::make_pair((unsigned)V4_SA1_inc, 4352), + std::make_pair((unsigned)V4_SA1_seti, 2048), + std::make_pair((unsigned)V4_SA1_setin1, 6656), + std::make_pair((unsigned)V4_SA1_sxtb, 5376), + std::make_pair((unsigned)V4_SA1_sxth, 5120), + std::make_pair((unsigned)V4_SA1_tfr, 4096), + std::make_pair((unsigned)V4_SA1_zxtb, 5888), + std::make_pair((unsigned)V4_SA1_zxth, 5632), + std::make_pair((unsigned)V4_SL1_loadri_io, 0), + std::make_pair((unsigned)V4_SL1_loadrub_io, 4096), + std::make_pair((unsigned)V4_SL2_deallocframe, 7936), + std::make_pair((unsigned)V4_SL2_jumpr31, 8128), + std::make_pair((unsigned)V4_SL2_jumpr31_f, 8133), + std::make_pair((unsigned)V4_SL2_jumpr31_fnew, 8135), + std::make_pair((unsigned)V4_SL2_jumpr31_t, 8132), + std::make_pair((unsigned)V4_SL2_jumpr31_tnew, 8134), + std::make_pair((unsigned)V4_SL2_loadrb_io, 4096), + std::make_pair((unsigned)V4_SL2_loadrd_sp, 7680), + std::make_pair((unsigned)V4_SL2_loadrh_io, 0), + std::make_pair((unsigned)V4_SL2_loadri_sp, 7168), + std::make_pair((unsigned)V4_SL2_loadruh_io, 2048), + std::make_pair((unsigned)V4_SL2_return, 8000), + std::make_pair((unsigned)V4_SL2_return_f, 8005), + std::make_pair((unsigned)V4_SL2_return_fnew, 8007), + std::make_pair((unsigned)V4_SL2_return_t, 8004), + std::make_pair((unsigned)V4_SL2_return_tnew, 8006), + std::make_pair((unsigned)V4_SS1_storeb_io, 4096), + std::make_pair((unsigned)V4_SS1_storew_io, 0), + std::make_pair((unsigned)V4_SS2_allocframe, 7168), + std::make_pair((unsigned)V4_SS2_storebi0, 4608), + std::make_pair((unsigned)V4_SS2_storebi1, 4864), + std::make_pair((unsigned)V4_SS2_stored_sp, 2560), + std::make_pair((unsigned)V4_SS2_storeh_io, 0), + std::make_pair((unsigned)V4_SS2_storew_sp, 2048), + std::make_pair((unsigned)V4_SS2_storewi0, 4096), + std::make_pair((unsigned)V4_SS2_storewi1, 4352)}; + +static std::map<unsigned, unsigned> + subinstOpcodeMap(opcodeData, + opcodeData + sizeof(opcodeData) / sizeof(opcodeData[0])); + +bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + return false; + case HexagonII::HSIG_L1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_L2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_A: + return (Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_Compound: + return (Gb == HexagonII::HSIG_Compound); + } + return false; +} + +unsigned HexagonMCInstrInfo::iClassOfDuplexPair(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + break; + case HexagonII::HSIG_L1: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0; + case HexagonII::HSIG_A: + return 0x4; + } + case HexagonII::HSIG_L2: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0x1; + case HexagonII::HSIG_L2: + return 0x2; + case HexagonII::HSIG_A: + return 0x5; + } + case HexagonII::HSIG_S1: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0x8; + case HexagonII::HSIG_L2: + return 0x9; + case HexagonII::HSIG_S1: + return 0xA; + case HexagonII::HSIG_A: + return 0x6; + } + case HexagonII::HSIG_S2: + switch (Gb) { + default: + break; + case HexagonII::HSIG_L1: + return 0xC; + case HexagonII::HSIG_L2: + return 0xD; + case HexagonII::HSIG_S1: + return 0xB; + case HexagonII::HSIG_S2: + return 0xE; + case HexagonII::HSIG_A: + return 0x7; + } + case HexagonII::HSIG_A: + switch (Gb) { + default: + break; + case HexagonII::HSIG_A: + return 0x3; + } + case HexagonII::HSIG_Compound: + switch (Gb) { + case HexagonII::HSIG_Compound: + return 0xFFFFFFFF; + } + } + return 0xFFFFFFFF; +} + +unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { + unsigned DstReg, PredReg, SrcReg, Src1Reg, Src2Reg; + + switch (MCI.getOpcode()) { + default: + return HexagonII::HSIG_None; + // + // Group L1: + // + // Rd = memw(Rs+#u4:2) + // Rd = memub(Rs+#u4:0) + case Hexagon::L2_loadri_io: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + // Special case this one from Group L2. + // Rd = memw(r29+#u5:2) + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && + MCI.getOperand(2).isImm() && + isShiftedUInt<5, 2>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L2; + } + // Rd = memw(Rs+#u4:2) + if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (MCI.getOperand(2).isImm() && + isShiftedUInt<4, 2>(MCI.getOperand(2).getImm()))) { + return HexagonII::HSIG_L1; + } + } + break; + case Hexagon::L2_loadrub_io: + // Rd = memub(Rs+#u4:0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && isUInt<4>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L1; + } + break; + // + // Group L2: + // + // Rd = memh/memuh(Rs+#u3:1) + // Rd = memb(Rs+#u3:0) + // Rd = memw(r29+#u5:2) - Handled above. + // Rdd = memd(r29+#u5:3) + // deallocframe + // [if ([!]p0[.new])] dealloc_return + // [if ([!]p0[.new])] jumpr r31 + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + // Rd = memh/memuh(Rs+#u3:1) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && + isShiftedUInt<3, 1>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L2_loadrb_io: + // Rd = memb(Rs+#u3:0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && isUInt<3>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L2_loadrd_io: + // Rdd = memd(r29+#u5:3) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && + MCI.getOperand(2).isImm() && + isShiftedUInt<5, 3>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_L2; + } + break; + + case Hexagon::L4_return: + + case Hexagon::L2_deallocframe: + + return HexagonII::HSIG_L2; + case Hexagon::EH_RETURN_JMPR: + + case Hexagon::J2_jumpr: + case Hexagon::JMPret: + // jumpr r31 + // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>. + DstReg = MCI.getOperand(0).getReg(); + if (Hexagon::R31 == DstReg) { + return HexagonII::HSIG_L2; + } + break; + + case Hexagon::J2_jumprt: + case Hexagon::J2_jumprf: + case Hexagon::J2_jumprtnew: + case Hexagon::J2_jumprfnew: + case Hexagon::JMPrett: + case Hexagon::JMPretf: + case Hexagon::JMPrettnew: + case Hexagon::JMPretfnew: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPretfnewpt: + DstReg = MCI.getOperand(1).getReg(); + SrcReg = MCI.getOperand(0).getReg(); + // [if ([!]p0[.new])] jumpr r31 + if ((HexagonMCInstrInfo::isPredReg(SrcReg) && (Hexagon::P0 == SrcReg)) && + (Hexagon::R31 == DstReg)) { + return HexagonII::HSIG_L2; + } + break; + case Hexagon::L4_return_t: + + case Hexagon::L4_return_f: + + case Hexagon::L4_return_tnew_pnt: + + case Hexagon::L4_return_fnew_pnt: + + case Hexagon::L4_return_tnew_pt: + + case Hexagon::L4_return_fnew_pt: + // [if ([!]p0[.new])] dealloc_return + SrcReg = MCI.getOperand(0).getReg(); + if (Hexagon::P0 == SrcReg) { + return HexagonII::HSIG_L2; + } + break; + // + // Group S1: + // + // memw(Rs+#u4:2) = Rt + // memb(Rs+#u4:0) = Rt + case Hexagon::S2_storeri_io: + // Special case this one from Group S2. + // memw(r29+#u5:2) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntReg(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + Hexagon::R29 == Src1Reg && MCI.getOperand(1).isImm() && + isShiftedUInt<5, 2>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S2; + } + // memw(Rs+#u4:2) = Rt + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + MCI.getOperand(1).isImm() && + isShiftedUInt<4, 2>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S1; + } + break; + case Hexagon::S2_storerb_io: + // memb(Rs+#u4:0) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S1; + } + break; + // + // Group S2: + // + // memh(Rs+#u3:1) = Rt + // memw(r29+#u5:2) = Rt + // memd(r29+#s6:3) = Rtt + // memw(Rs+#u4:2) = #U1 + // memb(Rs+#u4) = #U1 + // allocframe(#u5:3) + case Hexagon::S2_storerh_io: + // memh(Rs+#u3:1) = Rt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && + MCI.getOperand(1).isImm() && + isShiftedUInt<3, 1>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S2_storerd_io: + // memd(r29+#s6:3) = Rtt + Src1Reg = MCI.getOperand(0).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) && + HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg && + MCI.getOperand(1).isImm() && + isShiftedInt<6, 3>(MCI.getOperand(1).getImm())) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S4_storeiri_io: + // memw(Rs+#u4:2) = #U1 + Src1Reg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + MCI.getOperand(1).isImm() && + isShiftedUInt<4, 2>(MCI.getOperand(1).getImm()) && + MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S4_storeirb_io: + // memb(Rs+#u4) = #U1 + Src1Reg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && + MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) && + MCI.getOperand(2).isImm() && MCI.getOperand(2).isImm() && + isUInt<1>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_S2; + } + break; + case Hexagon::S2_allocframe: + if (MCI.getOperand(0).isImm() && + isShiftedUInt<5, 3>(MCI.getOperand(0).getImm())) { + return HexagonII::HSIG_S2; + } + break; + // + // Group A: + // + // Rx = add(Rx,#s7) + // Rd = Rs + // Rd = #u6 + // Rd = #-1 + // if ([!]P0[.new]) Rd = #0 + // Rd = add(r29,#u6:2) + // Rx = add(Rx,Rs) + // P0 = cmp.eq(Rs,#u2) + // Rdd = combine(#0,Rs) + // Rdd = combine(Rs,#0) + // Rdd = combine(#u2,#U2) + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + // Rd = sxth/sxtb/zxtb/zxth(Rs) + // Rd = and(Rs,#1) + case Hexagon::A2_addi: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + // Rd = add(r29,#u6:2) + if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && + MCI.getOperand(2).isImm() && + isShiftedUInt<6, 2>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_A; + } + // Rx = add(Rx,#s7) + if (DstReg == SrcReg) { + return HexagonII::HSIG_A; + } + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) || + (MCI.getOperand(2).getImm() == -1))) { + return HexagonII::HSIG_A; + } + } + break; + case Hexagon::A2_add: + // Rx = add(Rx,Rs) + DstReg = MCI.getOperand(0).getReg(); + Src1Reg = MCI.getOperand(1).getReg(); + Src2Reg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) && + HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_andir: + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) || + (MCI.getOperand(2).getImm() == 255))) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_tfrsi: + DstReg = MCI.getOperand(0).getReg(); + + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmoveif: + case Hexagon::C2_cmovenewif: + // if ([!]P0[.new]) Rd = #0 + // Actual form: + // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>; + DstReg = MCI.getOperand(0).getReg(); // Rd + PredReg = MCI.getOperand(1).getReg(); // P0 + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + Hexagon::P0 == PredReg && MCI.getOperand(2).isImm() && + MCI.getOperand(2).getImm() == 0) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::C2_cmpeqi: + // P0 = cmp.eq(Rs,#u2) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (Hexagon::P0 == DstReg && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + MCI.getOperand(2).isImm() && isUInt<2>(MCI.getOperand(2).getImm())) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + // Rdd = combine(#u2,#U2) + DstReg = MCI.getOperand(0).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + // TODO: Handle Globals/Symbols + (MCI.getOperand(1).isImm() && isUInt<2>(MCI.getOperand(1).getImm())) && + ((MCI.getOperand(2).isImm() && + isUInt<2>(MCI.getOperand(2).getImm())))) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A4_combineri: + // Rdd = combine(Rs,#0) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (MCI.getOperand(2).isImm() && MCI.getOperand(2).getImm() == 0)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A4_combineir: + // Rdd = combine(#0,Rs) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(2).getReg(); + if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && + (MCI.getOperand(1).isImm() && MCI.getOperand(1).getImm() == 0)) { + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + // Rd = sxth/sxtb/zxtb/zxth(Rs) + DstReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && + HexagonMCInstrInfo::isIntRegForSubInst(SrcReg)) { + return HexagonII::HSIG_A; + } + break; + } + + return HexagonII::HSIG_None; +} + +bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) { + + unsigned DstReg, SrcReg; + + switch (potentialDuplex.getOpcode()) { + case Hexagon::A2_addi: + // testing for case of: Rx = add(Rx,#s7) + DstReg = potentialDuplex.getOperand(0).getReg(); + SrcReg = potentialDuplex.getOperand(1).getReg(); + if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + if (potentialDuplex.getOperand(2).isExpr()) + return true; + if (potentialDuplex.getOperand(2).isImm() && + !(isShiftedInt<7, 0>(potentialDuplex.getOperand(2).getImm()))) + return true; + } + break; + case Hexagon::A2_tfrsi: + DstReg = potentialDuplex.getOperand(0).getReg(); + + if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { + if (potentialDuplex.getOperand(1).isExpr()) + return true; + // Check for case of Rd = #-1. + if (potentialDuplex.getOperand(1).isImm() && + (potentialDuplex.getOperand(1).getImm() == -1)) + return false; + // Check for case of Rd = #u6. + if (potentialDuplex.getOperand(1).isImm() && + !isShiftedUInt<6, 0>(potentialDuplex.getOperand(1).getImm())) + return true; + } + break; + default: + break; + } + return false; +} + +/// non-Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII, + MCInst const &MIa, bool ExtendedA, + MCInst const &MIb, bool ExtendedB, + bool bisReversable) { + // Slot 1 cannot be extended in duplexes PRM 10.5 + if (ExtendedA) + return false; + // Only A2_addi and A2_tfrsi can be extended in duplex form PRM 10.5 + if (ExtendedB) { + unsigned Opcode = MIb.getOpcode(); + if ((Opcode != Hexagon::A2_addi) && (Opcode != Hexagon::A2_tfrsi)) + return false; + } + unsigned MIaG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIa), + MIbG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIb); + + // If a duplex contains 2 insns in the same group, the insns must be + // ordered such that the numerically smaller opcode is in slot 1. + if ((MIaG != HexagonII::HSIG_None) && (MIaG == MIbG) && bisReversable) { + MCInst SubInst0 = HexagonMCInstrInfo::deriveSubInst(MIa); + MCInst SubInst1 = HexagonMCInstrInfo::deriveSubInst(MIb); + + unsigned zeroedSubInstS0 = + subinstOpcodeMap.find(SubInst0.getOpcode())->second; + unsigned zeroedSubInstS1 = + subinstOpcodeMap.find(SubInst1.getOpcode())->second; + + if (zeroedSubInstS0 < zeroedSubInstS1) + // subinstS0 (maps to slot 0) must be greater than + // subinstS1 (maps to slot 1) + return false; + } + + // allocframe must always be in slot 0 + if (MIb.getOpcode() == Hexagon::S2_allocframe) + return false; + + if ((MIaG != HexagonII::HSIG_None) && (MIbG != HexagonII::HSIG_None)) { + // Prevent 2 instructions with extenders from duplexing + // Note that MIb (slot1) can be extended and MIa (slot0) + // can never be extended + if (subInstWouldBeExtended(MIa)) + return false; + + // If duplexing produces an extender, but the original did not + // have an extender, do not duplex. + if (subInstWouldBeExtended(MIb) && !ExtendedB) + return false; + } + + // If jumpr r31 appears, it must be in slot 0, and never slot 1 (MIb). + if (MIbG == HexagonII::HSIG_L2) { + if ((MIb.getNumOperands() > 1) && MIb.getOperand(1).isReg() && + (MIb.getOperand(1).getReg() == Hexagon::R31)) + return false; + if ((MIb.getNumOperands() > 0) && MIb.getOperand(0).isReg() && + (MIb.getOperand(0).getReg() == Hexagon::R31)) + return false; + } + + // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb); + // therefore, not duplexable if slot 1 is a store, and slot 0 is not. + if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) { + if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2)) + return false; + } + + return (isDuplexPairMatch(MIaG, MIbG)); +} + +/// Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonMCInstrInfo::isDuplexPair(MCInst const &MIa, MCInst const &MIb) { + unsigned MIaG = getDuplexCandidateGroup(MIa), + MIbG = getDuplexCandidateGroup(MIb); + return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); +} + +inline static void addOps(MCInst &subInstPtr, MCInst const &Inst, + unsigned opNum) { + if (Inst.getOperand(opNum).isReg()) { + switch (Inst.getOperand(opNum).getReg()) { + default: + llvm_unreachable("Not Duplexable Register"); + break; + case Hexagon::R0: + case Hexagon::R1: + case Hexagon::R2: + case Hexagon::R3: + case Hexagon::R4: + case Hexagon::R5: + case Hexagon::R6: + case Hexagon::R7: + case Hexagon::D0: + case Hexagon::D1: + case Hexagon::D2: + case Hexagon::D3: + case Hexagon::R16: + case Hexagon::R17: + case Hexagon::R18: + case Hexagon::R19: + case Hexagon::R20: + case Hexagon::R21: + case Hexagon::R22: + case Hexagon::R23: + case Hexagon::D8: + case Hexagon::D9: + case Hexagon::D10: + case Hexagon::D11: + subInstPtr.addOperand(Inst.getOperand(opNum)); + break; + } + } else + subInstPtr.addOperand(Inst.getOperand(opNum)); +} + +MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { + MCInst Result; + switch (Inst.getOpcode()) { + default: + // dbgs() << "opcode: "<< Inst->getOpcode() << "\n"; + llvm_unreachable("Unimplemented subinstruction \n"); + break; + case Hexagon::A2_addi: + if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == 1) { + Result.setOpcode(Hexagon::V4_SA1_inc); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; + } // 1,2 SUBInst $Rd = add($Rs, #1) + else if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == -1) { + Result.setOpcode(Hexagon::V4_SA1_dec); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; + } // 1,2 SUBInst $Rd = add($Rs,#-1) + else if (Inst.getOperand(1).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SA1_addsp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; + } // 1,3 SUBInst $Rd = add(r29, #$u6_2) + else { + Result.setOpcode(Hexagon::V4_SA1_addi); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; + } // 1,2,3 SUBInst $Rx = add($Rx, #$s7) + case Hexagon::A2_add: + Result.setOpcode(Hexagon::V4_SA1_addrx); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rx = add($_src_, $Rs) + case Hexagon::S2_allocframe: + Result.setOpcode(Hexagon::V4_SS2_allocframe); + addOps(Result, Inst, 0); + break; // 1 SUBInst allocframe(#$u5_3) + case Hexagon::A2_andir: + if (Inst.getOperand(2).getImm() == 255) { + Result.setOpcode(Hexagon::V4_SA1_zxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 $Rd = and($Rs, #255) + } else { + Result.setOpcode(Hexagon::V4_SA1_and1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = and($Rs, #1) + } + case Hexagon::C2_cmpeqi: + Result.setOpcode(Hexagon::V4_SA1_cmpeqi); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2) + case Hexagon::A4_combineii: + case Hexagon::A2_combineii: + if (Inst.getOperand(1).getImm() == 1) { + Result.setOpcode(Hexagon::V4_SA1_combine1i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#1, #$u2) + } + + if (Inst.getOperand(1).getImm() == 3) { + Result.setOpcode(Hexagon::V4_SA1_combine3i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#3, #$u2) + } + if (Inst.getOperand(1).getImm() == 0) { + Result.setOpcode(Hexagon::V4_SA1_combine0i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#0, #$u2) + } + if (Inst.getOperand(1).getImm() == 2) { + Result.setOpcode(Hexagon::V4_SA1_combine2i); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#2, #$u2) + } + case Hexagon::A4_combineir: + Result.setOpcode(Hexagon::V4_SA1_combinezr); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = combine(#0, $Rs) + + case Hexagon::A4_combineri: + Result.setOpcode(Hexagon::V4_SA1_combinerz); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rdd = combine($Rs, #0) + case Hexagon::L4_return_tnew_pnt: + case Hexagon::L4_return_tnew_pt: + Result.setOpcode(Hexagon::V4_SL2_return_tnew); + break; // none SUBInst if (p0.new) dealloc_return:nt + case Hexagon::L4_return_fnew_pnt: + case Hexagon::L4_return_fnew_pt: + Result.setOpcode(Hexagon::V4_SL2_return_fnew); + break; // none SUBInst if (!p0.new) dealloc_return:nt + case Hexagon::L4_return_f: + Result.setOpcode(Hexagon::V4_SL2_return_f); + break; // none SUBInst if (!p0) dealloc_return + case Hexagon::L4_return_t: + Result.setOpcode(Hexagon::V4_SL2_return_t); + break; // none SUBInst if (p0) dealloc_return + case Hexagon::L4_return: + Result.setOpcode(Hexagon::V4_SL2_return); + break; // none SUBInst dealloc_return + case Hexagon::L2_deallocframe: + Result.setOpcode(Hexagon::V4_SL2_deallocframe); + break; // none SUBInst deallocframe + case Hexagon::EH_RETURN_JMPR: + case Hexagon::J2_jumpr: + case Hexagon::JMPret: + Result.setOpcode(Hexagon::V4_SL2_jumpr31); + break; // none SUBInst jumpr r31 + case Hexagon::J2_jumprf: + case Hexagon::JMPretf: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_f); + break; // none SUBInst if (!p0) jumpr r31 + case Hexagon::J2_jumprfnew: + case Hexagon::JMPretfnewpt: + case Hexagon::JMPretfnew: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_fnew); + break; // none SUBInst if (!p0.new) jumpr:nt r31 + case Hexagon::J2_jumprt: + case Hexagon::JMPrett: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_t); + break; // none SUBInst if (p0) jumpr r31 + case Hexagon::J2_jumprtnew: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPrettnew: + Result.setOpcode(Hexagon::V4_SL2_jumpr31_tnew); + break; // none SUBInst if (p0.new) jumpr:nt r31 + case Hexagon::L2_loadrb_io: + Result.setOpcode(Hexagon::V4_SL2_loadrb_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memb($Rs + #$u3_0) + case Hexagon::L2_loadrd_io: + Result.setOpcode(Hexagon::V4_SL2_loadrd_sp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 1,3 SUBInst $Rdd = memd(r29 + #$u5_3) + case Hexagon::L2_loadrh_io: + Result.setOpcode(Hexagon::V4_SL2_loadrh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memh($Rs + #$u3_1) + case Hexagon::L2_loadrub_io: + Result.setOpcode(Hexagon::V4_SL1_loadrub_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memub($Rs + #$u4_0) + case Hexagon::L2_loadruh_io: + Result.setOpcode(Hexagon::V4_SL2_loadruh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memuh($Rs + #$u3_1) + case Hexagon::L2_loadri_io: + if (Inst.getOperand(1).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SL2_loadri_sp); + addOps(Result, Inst, 0); + addOps(Result, Inst, 2); + break; // 2 1,3 SUBInst $Rd = memw(r29 + #$u5_2) + } else { + Result.setOpcode(Hexagon::V4_SL1_loadri_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2) + } + case Hexagon::S4_storeirb_io: + if (Inst.getOperand(2).getImm() == 0) { + Result.setOpcode(Hexagon::V4_SS2_storebi0); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0 + } else if (Inst.getOperand(2).getImm() == 1) { + Result.setOpcode(Hexagon::V4_SS2_storebi1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 2 1,2 SUBInst memb($Rs + #$u4_0)=#1 + } + case Hexagon::S2_storerb_io: + Result.setOpcode(Hexagon::V4_SS1_storeb_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt + case Hexagon::S2_storerd_io: + Result.setOpcode(Hexagon::V4_SS2_stored_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 2,3 SUBInst memd(r29 + #$s6_3) = $Rtt + case Hexagon::S2_storerh_io: + Result.setOpcode(Hexagon::V4_SS2_storeh_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt + case Hexagon::S4_storeiri_io: + if (Inst.getOperand(2).getImm() == 0) { + Result.setOpcode(Hexagon::V4_SS2_storewi0); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0 + } else if (Inst.getOperand(2).getImm() == 1) { + Result.setOpcode(Hexagon::V4_SS2_storewi1); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#1 + } else if (Inst.getOperand(0).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SS2_storew_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); + break; // 1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt + } + case Hexagon::S2_storeri_io: + if (Inst.getOperand(0).getReg() == Hexagon::R29) { + Result.setOpcode(Hexagon::V4_SS2_storew_sp); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); // 1,2,3 SUBInst memw(sp + #$u5_2) = $Rt + } else { + Result.setOpcode(Hexagon::V4_SS1_storew_io); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + addOps(Result, Inst, 2); // 1,2,3 SUBInst memw($Rs + #$u4_2) = $Rt + } + break; + case Hexagon::A2_sxtb: + Result.setOpcode(Hexagon::V4_SA1_sxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = sxtb($Rs) + case Hexagon::A2_sxth: + Result.setOpcode(Hexagon::V4_SA1_sxth); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = sxth($Rs) + case Hexagon::A2_tfr: + Result.setOpcode(Hexagon::V4_SA1_tfr); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = $Rs + case Hexagon::C2_cmovenewif: + Result.setOpcode(Hexagon::V4_SA1_clrfnew); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (!p0.new) $Rd = #0 + case Hexagon::C2_cmovenewit: + Result.setOpcode(Hexagon::V4_SA1_clrtnew); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (p0.new) $Rd = #0 + case Hexagon::C2_cmoveif: + Result.setOpcode(Hexagon::V4_SA1_clrf); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (!p0) $Rd = #0 + case Hexagon::C2_cmoveit: + Result.setOpcode(Hexagon::V4_SA1_clrt); + addOps(Result, Inst, 0); + break; // 2 SUBInst if (p0) $Rd = #0 + case Hexagon::A2_tfrsi: + if (Inst.getOperand(1).isImm() && Inst.getOperand(1).getImm() == -1) { + Result.setOpcode(Hexagon::V4_SA1_setin1); + addOps(Result, Inst, 0); + break; // 2 1 SUBInst $Rd = #-1 + } else { + Result.setOpcode(Hexagon::V4_SA1_seti); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = #$u6 + } + case Hexagon::A2_zxtb: + Result.setOpcode(Hexagon::V4_SA1_zxtb); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 $Rd = and($Rs, #255) + + case Hexagon::A2_zxth: + Result.setOpcode(Hexagon::V4_SA1_zxth); + addOps(Result, Inst, 0); + addOps(Result, Inst, 1); + break; // 1,2 SUBInst $Rd = zxth($Rs) + } + return Result; +} + +static bool isStoreInst(unsigned opCode) { + switch (opCode) { + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerd_io: + case Hexagon::S4_storeiri_io: + case Hexagon::S4_storeirb_io: + case Hexagon::S2_allocframe: + return true; + default: + return false; + } +} + +SmallVector<DuplexCandidate, 8> +HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, + MCInst const &MCB) { + assert(isBundle(MCB)); + SmallVector<DuplexCandidate, 8> duplexToTry; + // Use an "order matters" version of isDuplexPair. + unsigned numInstrInPacket = MCB.getNumOperands(); + + for (unsigned distance = 1; distance < numInstrInPacket; ++distance) { + for (unsigned j = HexagonMCInstrInfo::bundleInstructionsOffset, + k = j + distance; + (j < numInstrInPacket) && (k < numInstrInPacket); ++j, ++k) { + + // Check if reversable. + bool bisReversable = true; + if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) && + isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) { + DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j + << "\n"); + bisReversable = false; + } + + // Try in order. + if (isOrderedDuplexPair( + MCII, *MCB.getOperand(k).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1), + *MCB.getOperand(j).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1), + bisReversable)) { + // Get iClass. + unsigned iClass = iClassOfDuplexPair( + getDuplexCandidateGroup(*MCB.getOperand(k).getInst()), + getDuplexCandidateGroup(*MCB.getOperand(j).getInst())); + + // Save off pairs for duplex checking. + duplexToTry.push_back(DuplexCandidate(j, k, iClass)); + DEBUG(dbgs() << "adding pair: " << j << "," << k << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + continue; + } else { + DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } + + // Try reverse. + if (bisReversable) { + if (isOrderedDuplexPair( + MCII, *MCB.getOperand(j).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1), + *MCB.getOperand(k).getInst(), + HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1), + bisReversable)) { + // Get iClass. + unsigned iClass = iClassOfDuplexPair( + getDuplexCandidateGroup(*MCB.getOperand(j).getInst()), + getDuplexCandidateGroup(*MCB.getOperand(k).getInst())); + + // Save off pairs for duplex checking. + duplexToTry.push_back(DuplexCandidate(k, j, iClass)); + DEBUG(dbgs() << "adding pair:" << k << "," << j << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } else { + DEBUG(dbgs() << "skipping pair: " << k << "," << j << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + } + } + } + } + return duplexToTry; +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 93c7a0d98bf2..2731278f0e41 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -12,12 +12,53 @@ //===----------------------------------------------------------------------===// #include "HexagonMCInstrInfo.h" + +#include "Hexagon.h" #include "HexagonBaseInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" + namespace llvm { -void HexagonMCInstrInfo::AppendImplicitOperands(MCInst &MCI) { - MCI.addOperand(MCOperand::createImm(0)); - MCI.addOperand(MCOperand::createInst(nullptr)); +iterator_range<MCInst::const_iterator> +HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { + assert(isBundle(MCI)); + return iterator_range<MCInst::const_iterator>( + MCI.begin() + bundleInstructionsOffset, MCI.end()); +} + +size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { + if (HexagonMCInstrInfo::isBundle(MCI)) + return (MCI.size() - bundleInstructionsOffset); + else + return (1); +} + +MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass, + MCInst const &inst0, + MCInst const &inst1) { + assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf"); + MCInst *duplexInst = new (Context) MCInst; + duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass); + + MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0)); + MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1)); + duplexInst->addOperand(MCOperand::createInst(SubInst0)); + duplexInst->addOperand(MCOperand::createInst(SubInst1)); + return duplexInst; +} + +MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB, + size_t Index) { + assert(Index <= bundleSize(MCB)); + if (Index == 0) + return nullptr; + MCInst const *Inst = + MCB.getOperand(Index + bundleInstructionsOffset - 1).getInst(); + if (isImmext(*Inst)) + return Inst; + return nullptr; } HexagonII::MemAccessSize @@ -46,6 +87,24 @@ MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII, return (MCII.get(MCI.getOpcode())); } +unsigned short HexagonMCInstrInfo::getExtendableOp(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +MCOperand const & +HexagonMCInstrInfo::getExtendableOperand(MCInstrInfo const &MCII, + MCInst const &MCI) { + unsigned O = HexagonMCInstrInfo::getExtendableOp(MCII, MCI); + MCOperand const &MO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isExtendable(MCII, MCI) || + HexagonMCInstrInfo::isExtended(MCII, MCI)) && + (MO.isImm() || MO.isExpr())); + return (MO); +} + unsigned HexagonMCInstrInfo::getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -58,12 +117,6 @@ unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII, return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); } -std::bitset<16> HexagonMCInstrInfo::GetImplicitBits(MCInst const &MCI) { - SanityCheckImplicitOperands(MCI); - std::bitset<16> Bits(MCI.getOperand(MCI.getNumOperands() - 2).getImm()); - return Bits; -} - // Return the max value that a constant extendable operand can have // without being extended. int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII, @@ -99,9 +152,14 @@ char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, return MCII.getName(MCI.getOpcode()); } -// Return the operand that consumes or produces a new value. -MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII, +unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask); +} + +MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII, + MCInst const &MCI) { uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; unsigned const O = (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask; @@ -113,6 +171,21 @@ MCOperand const &HexagonMCInstrInfo::getNewValue(MCInstrInfo const &MCII, return (MCO); } +int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + + HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>( + (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask); + + switch (Target) { + default: + return Hexagon::ArchV4; + case HexagonII::HasV5SubT: + return Hexagon::ArchV5; + } +} + // Return the Hexagon ISA class for the insn. unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, MCInst const &MCI) { @@ -121,6 +194,32 @@ unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); } +unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCInst const &MCI) { + + const InstrItinerary *II = STI.getSchedModel().InstrItineraries; + int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass(); + return ((II[SchedClass].FirstStage + HexagonStages)->getUnits()); +} + +bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) { + if (!HexagonMCInstrInfo::isBundle(MCI)) + return false; + + for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { + auto MI = I.getInst(); + if (isImmext(*MI)) + return true; + } + + return false; +} + +bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) { + return extenderForIndex(MCB, Index) != nullptr; +} + // Return whether the instruction is a legal new-value producer. bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI) { @@ -128,6 +227,18 @@ bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); } +MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) { + assert(isBundle(MCB)); + assert(Index < HEXAGON_PACKET_SIZE); + return *MCB.getOperand(bundleInstructionsOffset + Index).getInst(); +} + +bool HexagonMCInstrInfo::isBundle(MCInst const &MCI) { + auto Result = Hexagon::BUNDLE == MCI.getOpcode(); + assert(!Result || (MCI.size() > 0 && MCI.getOperand(0).isImm())); + return Result; +} + // Return whether the insn is an actual insn. bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() && @@ -135,6 +246,15 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP); } +bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) { + return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) || + (Reg >= Hexagon::D8 && Reg <= Hexagon::D11)); +} + +bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) { + return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI); +} + // Return whether the instruction needs to be constant extended. // 1) Always return true if the instruction has 'isExtended' flag set. // @@ -173,20 +293,44 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, return (ImmValue < MinValue || ImmValue > MaxValue); } -// Return true if the instruction may be extended based on the operand value. bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII, MCInst const &MCI) { uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; } -// Return whether the instruction must be always extended. bool HexagonMCInstrInfo::isExtended(MCInstrInfo const &MCII, MCInst const &MCI) { uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; } +bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::FPPos) & HexagonII::FPMask); +} + +bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) { + auto Op = MCI.getOpcode(); + return (Op == Hexagon::A4_ext_b || Op == Hexagon::A4_ext_c || + Op == Hexagon::A4_ext_g || Op == Hexagon::A4_ext); +} + +bool HexagonMCInstrInfo::isInnerLoop(MCInst const &MCI) { + assert(isBundle(MCI)); + int64_t Flags = MCI.getOperand(0).getImm(); + return (Flags & innerLoopMask) != 0; +} + +bool HexagonMCInstrInfo::isIntReg(unsigned Reg) { + return (Reg >= Hexagon::R0 && Reg <= Hexagon::R31); +} + +bool HexagonMCInstrInfo::isIntRegForSubInst(unsigned Reg) { + return ((Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || + (Reg >= Hexagon::R16 && Reg <= Hexagon::R23)); +} + // Return whether the insn is a new-value consumer. bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII, MCInst const &MCI) { @@ -203,46 +347,103 @@ bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII, OperandNum; } -bool HexagonMCInstrInfo::isPacketBegin(MCInst const &MCI) { - std::bitset<16> Bits(GetImplicitBits(MCI)); - return Bits.test(packetBeginIndex); +bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) { + assert(isBundle(MCI)); + int64_t Flags = MCI.getOperand(0).getImm(); + return (Flags & outerLoopMask) != 0; } -bool HexagonMCInstrInfo::isPacketEnd(MCInst const &MCI) { - std::bitset<16> Bits(GetImplicitBits(MCI)); - return Bits.test(packetEndIndex); +bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); +} + +bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ( + !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask)); +} + +bool HexagonMCInstrInfo::isPredReg(unsigned Reg) { + return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0); } -// Return whether the insn is a prefix. bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) { return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX); } -// Return whether the insn is solo, i.e., cannot be in a packet. bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); } -void HexagonMCInstrInfo::resetPacket(MCInst &MCI) { - setPacketBegin(MCI, false); - setPacketEnd(MCI, false); +bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask); +} + +bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask); +} + +void HexagonMCInstrInfo::padEndloop(MCInst &MCB) { + MCInst Nop; + Nop.setOpcode(Hexagon::A2_nop); + assert(isBundle(MCB)); + while ((HexagonMCInstrInfo::isInnerLoop(MCB) && + (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) || + ((HexagonMCInstrInfo::isOuterLoop(MCB) && + (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE)))) + MCB.addOperand(MCOperand::createInst(new MCInst(Nop))); +} + +bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII, + MCInst const &MCI) { + if (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) + return false; + + unsigned SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123: + case Hexagon::Sched::ALU64_tc_2_SLOT23: + case Hexagon::Sched::ALU64_tc_3x_SLOT23: + case Hexagon::Sched::M_tc_2_SLOT23: + case Hexagon::Sched::M_tc_3x_SLOT23: + case Hexagon::Sched::S_2op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_3x_SLOT23: + return true; + } + return false; } -void HexagonMCInstrInfo::SetImplicitBits(MCInst &MCI, std::bitset<16> Bits) { - SanityCheckImplicitOperands(MCI); - MCI.getOperand(MCI.getNumOperands() - 2).setImm(Bits.to_ulong()); +void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB, + DuplexCandidate Candidate) { + assert(Candidate.packetIndexI < MCB.size()); + assert(Candidate.packetIndexJ < MCB.size()); + assert(isBundle(MCB)); + MCInst *Duplex = + deriveDuplex(Context, Candidate.iClass, + *MCB.getOperand(Candidate.packetIndexJ).getInst(), + *MCB.getOperand(Candidate.packetIndexI).getInst()); + assert(Duplex != nullptr); + MCB.getOperand(Candidate.packetIndexI).setInst(Duplex); + MCB.erase(MCB.begin() + Candidate.packetIndexJ); } -void HexagonMCInstrInfo::setPacketBegin(MCInst &MCI, bool f) { - std::bitset<16> Bits(GetImplicitBits(MCI)); - Bits.set(packetBeginIndex, f); - SetImplicitBits(MCI, Bits); +void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | innerLoopMask); } -void HexagonMCInstrInfo::setPacketEnd(MCInst &MCI, bool f) { - std::bitset<16> Bits(GetImplicitBits(MCI)); - Bits.set(packetEndIndex, f); - SetImplicitBits(MCI, Bits); +void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | outerLoopMask); } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 082c80d5ac05..09f305f638e2 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -1,4 +1,4 @@ -//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===// +//===- HexagonMCInstrInfo.cpp - Utility functions on Hexagon MCInsts ------===// // // The LLVM Compiler Infrastructure // @@ -15,20 +15,47 @@ #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" - -#include <bitset> namespace llvm { +class MCContext; class MCInstrDesc; class MCInstrInfo; class MCInst; class MCOperand; +class MCSubtargetInfo; namespace HexagonII { enum class MemAccessSize; } +class DuplexCandidate { +public: + unsigned packetIndexI, packetIndexJ, iClass; + DuplexCandidate(unsigned i, unsigned j, unsigned iClass) + : packetIndexI(i), packetIndexJ(j), iClass(iClass) {} +}; namespace HexagonMCInstrInfo { -void AppendImplicitOperands(MCInst &MCI); +size_t const innerLoopOffset = 0; +int64_t const innerLoopMask = 1 << innerLoopOffset; + +size_t const outerLoopOffset = 1; +int64_t const outerLoopMask = 1 << outerLoopOffset; + +size_t const bundleInstructionsOffset = 1; + +// Returns the number of instructions in the bundle +size_t bundleSize(MCInst const &MCI); + +// Returns a iterator range of instructions in this bundle +iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI); + +// Return the extender for instruction at Index or nullptr if none +MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); + +// Create a duplex instruction given the two subinsts +MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, + MCInst const &inst1); + +// Convert this instruction in to a duplex subinst +MCInst deriveSubInst(MCInst const &Inst); // Return memory access size HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII, @@ -42,14 +69,26 @@ unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI); MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI); +// Return which duplex group this instruction belongs to +unsigned getDuplexCandidateGroup(MCInst const &MI); + +// Return a list of all possible instruction duplex combinations +SmallVector<DuplexCandidate, 8> getDuplexPossibilties(MCInstrInfo const &MCII, + MCInst const &MCB); + +// Return the index of the extendable operand +unsigned short getExtendableOp(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return a reference to the extendable operand +MCOperand const &getExtendableOperand(MCInstrInfo const &MCII, + MCInst const &MCI); + // Return the implicit alignment of the extendable operand unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI); // Return the number of logical bits of the extendable operand unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI); -std::bitset<16> GetImplicitBits(MCInst const &MCI); - // Return the max value that a constant extendable operand can have // without being extended. int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -61,27 +100,77 @@ int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI); // Return instruction name char const *getName(MCInstrInfo const &MCII, MCInst const &MCI); +// Return the operand index for the new value. +unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI); + // Return the operand that consumes or produces a new value. -MCOperand const &getNewValue(MCInstrInfo const &MCII, MCInst const &MCI); +MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI); + +int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI); // Return the Hexagon ISA class for the insn. unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI); +/// Return the slots used by the insn. +unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst const &MCI); + +// Does the packet have an extender for the instruction at Index +bool hasExtenderForIndex(MCInst const &MCB, size_t Index); + +bool hasImmExt(MCInst const &MCI); + // Return whether the instruction is a legal new-value producer. bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); +// Return the instruction at Index +MCInst const &instruction(MCInst const &MCB, size_t Index); + +// Returns whether this MCInst is a wellformed bundle +bool isBundle(MCInst const &MCI); + // Return whether the insn is an actual insn. bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); +// Return the duplex iclass given the two duplex classes +unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb); + // Return whether the instruction needs to be constant extended. bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); +// Is this double register suitable for use in a duplex subinst +bool isDblRegForSubInst(unsigned Reg); + +// Is this a duplex instruction +bool isDuplex(MCInstrInfo const &MCII, MCInst const &MCI); + +// Can these instructions be duplexed +bool isDuplexPair(MCInst const &MIa, MCInst const &MIb); + +// Can these duplex classes be combine in to a duplex instruction +bool isDuplexPairMatch(unsigned Ga, unsigned Gb); + // Return true if the insn may be extended based on the operand value. bool isExtendable(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the instruction must be always extended. bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI); +/// Return whether it is a floating-point insn. +bool isFloat(MCInstrInfo const &MCII, MCInst const &MCI); + +// Returns whether this instruction is an immediate extender +bool isImmext(MCInst const &MCI); + +// Returns whether this bundle is an endloop0 +bool isInnerLoop(MCInst const &MCI); + +// Is this an integer register +bool isIntReg(unsigned Reg); + +// Is this register suitable for use in a duplex subinst +bool isIntRegForSubInst(unsigned Reg); + // Return whether the insn is a new-value consumer. bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -89,9 +178,22 @@ bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI, unsigned short OperandNum); -bool isPacketBegin(MCInst const &MCI); +// Can these two instructions be duplexed +bool isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa, + bool ExtendedA, MCInst const &MIb, bool ExtendedB, + bool bisReversable); + +// Returns whether this bundle is an endloop1 +bool isOuterLoop(MCInst const &MCI); + +// Return whether this instruction is predicated +bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI); + +// Return whether the predicate sense is true +bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI); -bool isPacketEnd(MCInst const &MCI); +// Is this a predicate register +bool isPredReg(unsigned Reg); // Return whether the insn is a prefix. bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI); @@ -99,23 +201,31 @@ bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the insn is solo, i.e., cannot be in a packet. bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI); -static const size_t packetBeginIndex = 0; -static const size_t packetEndIndex = 1; +/// Return whether the insn can be packaged only with A and X-type insns. +bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI); -void resetPacket(MCInst &MCI); +/// Return whether the insn can be packaged only with an A-type insn in slot #1. +bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI); -inline void SanityCheckImplicitOperands(MCInst const &MCI) { - assert(MCI.getNumOperands() >= 2 && "At least the two implicit operands"); - assert(MCI.getOperand(MCI.getNumOperands() - 1).isInst() && - "Implicit bits and flags"); - assert(MCI.getOperand(MCI.getNumOperands() - 2).isImm() && "Parent pointer"); -} +// Pad the bundle with nops to satisfy endloop requirements +void padEndloop(MCInst &MCI); + +bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); + +// Replace the instructions inside MCB, represented by Candidate +void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate); + +// Marks a bundle as endloop0 +void setInnerLoop(MCInst &MCI); -void SetImplicitBits(MCInst &MCI, std::bitset<16> Bits); +// Marks a bundle as endloop1 +void setOuterLoop(MCInst &MCI); -void setPacketBegin(MCInst &MCI, bool Y); +// Would duplexing this instruction create a requirement to extend +bool subInstWouldBeExtended(MCInst const &potentialDuplex); -void setPacketEnd(MCInst &MCI, bool Y); +// Attempt to find and replace compound pairs +void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp new file mode 100644 index 000000000000..8e70280c1a0d --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -0,0 +1,237 @@ +//===----- HexagonMCShuffler.cpp - MC bundle shuffling --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-shuffle" + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> + DisableShuffle("disable-hexagon-shuffle", cl::Hidden, cl::init(false), + cl::desc("Disable Hexagon instruction shuffling")); + +void HexagonMCShuffler::init(MCInst &MCB) { + if (HexagonMCInstrInfo::isBundle(MCB)) { + MCInst const *Extender = nullptr; + // Copy the bundle for the shuffling. + for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo()); + MCInst *MI = const_cast<MCInst *>(I.getInst()); + + if (!HexagonMCInstrInfo::isImmext(*MI)) { + append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI), + false); + Extender = nullptr; + } else + Extender = MI; + } + } + + BundleFlags = MCB.getOperand(0).getImm(); +} + +void HexagonMCShuffler::init(MCInst &MCB, MCInst const *AddMI, + bool bInsertAtFront) { + if (HexagonMCInstrInfo::isBundle(MCB)) { + if (bInsertAtFront && AddMI) + append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI), + false); + MCInst const *Extender = nullptr; + // Copy the bundle for the shuffling. + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo()); + MCInst *MI = const_cast<MCInst *>(I.getInst()); + if (!HexagonMCInstrInfo::isImmext(*MI)) { + append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI), + false); + Extender = nullptr; + } else + Extender = MI; + } + if (!bInsertAtFront && AddMI) + append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI), + false); + } + + BundleFlags = MCB.getOperand(0).getImm(); +} + +void HexagonMCShuffler::copyTo(MCInst &MCB) { + MCB.clear(); + MCB.addOperand(MCOperand::createImm(BundleFlags)); + // Copy the results into the bundle. + for (HexagonShuffler::iterator I = begin(); I != end(); ++I) { + + MCInst const *MI = I->getDesc(); + MCInst const *Extender = I->getExtender(); + if (Extender) + MCB.addOperand(MCOperand::createInst(Extender)); + MCB.addOperand(MCOperand::createInst(MI)); + } +} + +bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) { + if (shuffle()) { + // Copy the results into the bundle. + copyTo(MCB); + } else + DEBUG(MCB.dump()); + + return (!getError()); +} + +bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB) { + HexagonMCShuffler MCS(MCII, STI, MCB); + + if (DisableShuffle) + // Ignore if user chose so. + return false; + + if (!HexagonMCInstrInfo::bundleSize(MCB)) { + // There once was a bundle: + // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ... + // * %D2<def> = IMPLICIT_DEF; flags: + // * %D7<def> = IMPLICIT_DEF; flags: + // After the IMPLICIT_DEFs were removed by the asm printer, the bundle + // became empty. + DEBUG(dbgs() << "Skipping empty bundle"); + return false; + } else if (!HexagonMCInstrInfo::isBundle(MCB)) { + DEBUG(dbgs() << "Skipping stand-alone insn"); + return false; + } + + // Reorder the bundle and copy the result. + if (!MCS.reshuffleTo(MCB)) { + // Unless there is any error, which should not happen at this point. + unsigned shuffleError = MCS.getError(); + switch (shuffleError) { + default: + llvm_unreachable("unknown error"); + case HexagonShuffler::SHUFFLE_ERROR_INVALID: + llvm_unreachable("invalid packet"); + case HexagonShuffler::SHUFFLE_ERROR_STORES: + llvm_unreachable("too many stores"); + case HexagonShuffler::SHUFFLE_ERROR_LOADS: + llvm_unreachable("too many loads"); + case HexagonShuffler::SHUFFLE_ERROR_BRANCHES: + llvm_unreachable("too many branches"); + case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS: + llvm_unreachable("no suitable slot"); + case HexagonShuffler::SHUFFLE_ERROR_SLOTS: + llvm_unreachable("over-subscribed slots"); + case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case. + return true; + } + } + + return true; +} + +unsigned +llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + SmallVector<DuplexCandidate, 8> possibleDuplexes) { + + if (DisableShuffle) + return HexagonShuffler::SHUFFLE_SUCCESS; + + if (!HexagonMCInstrInfo::bundleSize(MCB)) { + // There once was a bundle: + // BUNDLE %D2<imp-def>, %R4<imp-def>, %R5<imp-def>, %D7<imp-def>, ... + // * %D2<def> = IMPLICIT_DEF; flags: + // * %D7<def> = IMPLICIT_DEF; flags: + // After the IMPLICIT_DEFs were removed by the asm printer, the bundle + // became empty. + DEBUG(dbgs() << "Skipping empty bundle"); + return HexagonShuffler::SHUFFLE_SUCCESS; + } else if (!HexagonMCInstrInfo::isBundle(MCB)) { + DEBUG(dbgs() << "Skipping stand-alone insn"); + return HexagonShuffler::SHUFFLE_SUCCESS; + } + + bool doneShuffling = false; + unsigned shuffleError; + while (possibleDuplexes.size() > 0 && (!doneShuffling)) { + // case of Duplex Found + DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val(); + MCInst Attempt(MCB); + HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry); + HexagonMCShuffler MCS(MCII, STI, Attempt); // copy packet to the shuffler + if (MCS.size() == 1) { // case of one duplex + // copy the created duplex in the shuffler to the bundle + MCS.copyTo(MCB); + doneShuffling = true; + return HexagonShuffler::SHUFFLE_SUCCESS; + } + // try shuffle with this duplex + doneShuffling = MCS.reshuffleTo(MCB); + shuffleError = MCS.getError(); + + if (doneShuffling) + break; + } + + if (doneShuffling == false) { + HexagonMCShuffler MCS(MCII, STI, MCB); + doneShuffling = MCS.reshuffleTo(MCB); // shuffle + shuffleError = MCS.getError(); + } + if (!doneShuffling) + return shuffleError; + + return HexagonShuffler::SHUFFLE_SUCCESS; +} + +bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB, MCInst const *AddMI, int fixupCount) { + if (!HexagonMCInstrInfo::isBundle(MCB) || !AddMI) + return false; + + // if fixups present, make sure we don't insert too many nops that would + // later prevent an extender from being inserted. + unsigned int bundleSize = HexagonMCInstrInfo::bundleSize(MCB); + if (bundleSize >= HEXAGON_PACKET_SIZE) + return false; + if (fixupCount >= 2) { + return false; + } else { + if (bundleSize == HEXAGON_PACKET_SIZE - 1 && fixupCount) + return false; + } + + if (DisableShuffle) + return false; + + HexagonMCShuffler MCS(MCII, STI, MCB, AddMI); + if (!MCS.reshuffleTo(MCB)) { + unsigned shuffleError = MCS.getError(); + switch (shuffleError) { + default: + return false; + case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case + return true; + } + } + + return true; +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h new file mode 100644 index 000000000000..a21cce1fc240 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h @@ -0,0 +1,65 @@ +//=-- HexagonMCShuffler.h ---------------------------------------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCSHUFFLER_H +#define HEXAGONMCSHUFFLER_H + +#include "MCTargetDesc/HexagonShuffler.h" + +namespace llvm { + +class MCInst; + +// Insn bundle shuffler. +class HexagonMCShuffler : public HexagonShuffler { + bool immext_present; + bool duplex_present; + +public: + HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB) + : HexagonShuffler(MCII, STI) { + init(MCB); + }; + HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB, const MCInst *AddMI, + bool bInsertAtFront = false) + : HexagonShuffler(MCII, STI) { + init(MCB, AddMI, bInsertAtFront); + }; + + // Copy reordered bundle to another. + void copyTo(MCInst &MCB); + // Reorder and copy result to another. + bool reshuffleTo(MCInst &MCB); + + bool immextPresent() const { return immext_present; }; + bool duplexPresent() const { return duplex_present; }; + +private: + void init(MCInst &MCB); + void init(MCInst &MCB, const MCInst *AddMI, bool bInsertAtFront = false); +}; + +// Invocation of the shuffler. +bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &); +bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &, const MCInst *, int); +unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &, + SmallVector<DuplexCandidate, 8>); +} + +#endif // HEXAGONMCSHUFFLER_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 59395e230fa9..43734ed6ca3f 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -55,7 +55,7 @@ createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { } static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new HexagonMCAsmInfo(TT); // VirtualFP = (R30 + #0). @@ -112,11 +112,11 @@ extern "C" void LLVMInitializeHexagonTargetMC() { TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget, createHexagonMCCodeEmitter); - // Register the MC Inst Printer - TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, - createHexagonMCInstPrinter); - // Register the asm backend TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget, createHexagonAsmBackend); + + // Register the MC Inst Printer + TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, + createHexagonMCInstPrinter); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index de63fd271aea..81211cc026db 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -17,6 +17,8 @@ #include <cstdint> namespace llvm { +struct InstrItinerary; +struct InstrStage; class MCAsmBackend; class MCCodeEmitter; class MCContext; @@ -31,6 +33,8 @@ class raw_pwrite_stream; extern Target TheHexagonTarget; +extern const InstrStage HexagonStages[]; + MCInstrInfo *createHexagonMCInstrInfo(); MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp new file mode 100644 index 000000000000..feaaa4f780d5 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -0,0 +1,385 @@ +//===----- HexagonShuffler.cpp - Instruction bundle shuffling -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-shuffle" + +#include <algorithm> +#include <utility> +#include "Hexagon.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "HexagonShuffler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Insn shuffling priority. +class HexagonBid { + // The priority is directly proportional to how restricted the insn is based + // on its flexibility to run on the available slots. So, the fewer slots it + // may run on, the higher its priority. + enum { MAX = 360360 }; // LCD of 1/2, 1/3, 1/4,... 1/15. + unsigned Bid; + +public: + HexagonBid() : Bid(0){}; + HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; }; + + // Check if the insn priority is overflowed. + bool isSold() const { return (Bid >= MAX); }; + + HexagonBid &operator+=(const HexagonBid &B) { + Bid += B.Bid; + return *this; + }; +}; + +// Slot shuffling allocation. +class HexagonUnitAuction { + HexagonBid Scores[HEXAGON_PACKET_SIZE]; + // Mask indicating which slot is unavailable. + unsigned isSold : HEXAGON_PACKET_SIZE; + +public: + HexagonUnitAuction() : isSold(0){}; + + // Allocate slots. + bool bid(unsigned B) { + // Exclude already auctioned slots from the bid. + unsigned b = B & ~isSold; + if (b) { + for (unsigned i = 0; i < HEXAGON_PACKET_SIZE; ++i) + if (b & (1 << i)) { + // Request candidate slots. + Scores[i] += HexagonBid(b); + isSold |= Scores[i].isSold() << i; + } + return true; + ; + } else + // Error if the desired slots are already full. + return false; + }; +}; + +unsigned HexagonResource::setWeight(unsigned s) { + const unsigned SlotWeight = 8; + const unsigned MaskWeight = SlotWeight - 1; + bool Key = (1 << s) & getUnits(); + + // Calculate relative weight of the insn for the given slot, weighing it the + // heavier the more restrictive the insn is and the lowest the slots that the + // insn may be executed in. + Weight = + (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits())) + << countTrailingZeros(getUnits())); + return (Weight); +} + +HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI) + : MCII(MCII), STI(STI) { + reset(); +} + +void HexagonShuffler::reset() { + Packet.clear(); + BundleFlags = 0; + Error = SHUFFLE_SUCCESS; +} + +void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, + unsigned S, bool X) { + HexagonInstr PI(ID, Extender, S, X); + + Packet.push_back(PI); +} + +/// Check that the packet is legal and enforce relative insn order. +bool HexagonShuffler::check() { + // Descriptive slot masks. + const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, + slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4, + slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; + // Highest slots for branches and stores used to keep their original order. + unsigned slotJump = slotFirstJump; + unsigned slotLoadStore = slotFirstLoadStore; + // Number of branches, solo branches, indirect branches. + unsigned jumps = 0, jump1 = 0, jumpr = 0; + // Number of memory operations, loads, solo loads, stores, solo stores, single + // stores. + unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; + // Number of duplex insns, solo insns. + unsigned duplex = 0, solo = 0; + // Number of insns restricting other insns in the packet to A and X types, + // which is neither A or X types. + unsigned onlyAX = 0, neitherAnorX = 0; + // Number of insns restricting other insns in slot #1 to A type. + unsigned onlyAin1 = 0; + // Number of insns restricting any insn in slot #1, except A2_nop. + unsigned onlyNo1 = 0; + unsigned xtypeFloat = 0; + unsigned pSlot3Cnt = 0; + iterator slot3ISJ = end(); + + // Collect information from the insns in the packet. + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + + if (HexagonMCInstrInfo::isSolo(MCII, *ID)) + solo += !ISJ->isSoloException(); + else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID)) + onlyAX += !ISJ->isSoloException(); + else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID)) + onlyAin1 += !ISJ->isSoloException(); + if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 && + HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE) + ++neitherAnorX; + if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) { + ++pSlot3Cnt; + slot3ISJ = ISJ; + } + + switch (HexagonMCInstrInfo::getType(MCII, *ID)) { + case HexagonII::TypeXTYPE: + if (HexagonMCInstrInfo::isFloat(MCII, *ID)) + ++xtypeFloat; + break; + case HexagonII::TypeJR: + ++jumpr; + // Fall-through. + case HexagonII::TypeJ: + ++jumps; + break; + case HexagonII::TypeLD: + ++loads; + ++memory; + if (ISJ->Core.getUnits() == slotSingleLoad) + ++load0; + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn()) + ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + break; + case HexagonII::TypeST: + ++stores; + ++memory; + if (ISJ->Core.getUnits() == slotSingleStore) + ++store0; + break; + case HexagonII::TypeMEMOP: + ++loads; + ++stores; + ++store1; + ++memory; + break; + case HexagonII::TypeNV: + ++memory; // NV insns are memory-like. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch()) + ++jumps, ++jump1; + break; + case HexagonII::TypeCR: + // Legacy conditional branch predicated on a register. + case HexagonII::TypeSYSTEM: + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) + ++loads; + break; + } + } + + // Check if the packet is legal. + if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) || + (solo && size() > 1) || (onlyAX && neitherAnorX > 1) || + (onlyAX && xtypeFloat)) { + Error = SHUFFLE_ERROR_INVALID; + return false; + } + + if (jump1 && jumps > 1) { + // Error if single branch with another branch. + Error = SHUFFLE_ERROR_BRANCHES; + return false; + } + + // Modify packet accordingly. + // TODO: need to reserve slots #0 and #1 for duplex insns. + bool bOnlySlot3 = false; + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + + if (!ISJ->Core.getUnits()) { + // Error if insn may not be executed in any slot. + Error = SHUFFLE_ERROR_UNKNOWN; + return false; + } + + // Exclude from slot #1 any insn but A2_nop. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop) + if (onlyNo1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); + + // Exclude from slot #1 any insn but A-type. + if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32) + if (onlyAin1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); + + // Branches must keep the original order. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() || + HexagonMCInstrInfo::getDesc(MCII, *ID).isCall()) + if (jumps > 1) { + if (jumpr || slotJump < slotLastJump) { + // Error if indirect branch with another branch or + // no more slots available for branches. + Error = SHUFFLE_ERROR_BRANCHES; + return false; + } + // Pin the branch to the highest slot available to it. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump); + // Update next highest slot available to branches. + slotJump >>= 1; + } + + // A single load must use slot #0. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) { + if (loads == 1 && loads == memory) + // Pin the load to slot #0. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad); + } + + // A single store must use slot #0. + if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) { + if (!store0) { + if (stores == 1) + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore); + else if (stores > 1) { + if (slotLoadStore < slotLastLoadStore) { + // Error if no more slots available for stores. + Error = SHUFFLE_ERROR_STORES; + return false; + } + // Pin the store to the highest slot available to it. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore); + // Update the next highest slot available to stores. + slotLoadStore >>= 1; + } + } + if (store1 && stores > 1) { + // Error if a single store with another store. + Error = SHUFFLE_ERROR_STORES; + return false; + } + } + + // flag if an instruction can only be executed in slot 3 + if (ISJ->Core.getUnits() == slotThree) + bOnlySlot3 = true; + + if (!ISJ->Core.getUnits()) { + // Error if insn may not be executed in any slot. + Error = SHUFFLE_ERROR_NOSLOTS; + return false; + } + } + + bool validateSlots = true; + if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) { + // save off slot mask of instruction marked with A_PREFER_SLOT3 + // and then pin it to slot #3 + unsigned saveUnits = slot3ISJ->Core.getUnits(); + slot3ISJ->Core.setUnits(saveUnits & slotThree); + + HexagonUnitAuction AuctionCore; + std::sort(begin(), end(), HexagonInstr::lessCore); + + // see if things ok with that instruction being pinned to slot #3 + bool bFail = false; + for (iterator I = begin(); I != end() && bFail != true; ++I) + if (!AuctionCore.bid(I->Core.getUnits())) + bFail = true; + + // if yes, great, if not then restore original slot mask + if (!bFail) + validateSlots = false; // all good, no need to re-do auction + else + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const *ID = ISJ->getDesc(); + if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) + ISJ->Core.setUnits(saveUnits); + } + } + + // Check if any slot, core, is over-subscribed. + // Verify the core slot subscriptions. + if (validateSlots) { + HexagonUnitAuction AuctionCore; + + std::sort(begin(), end(), HexagonInstr::lessCore); + + for (iterator I = begin(); I != end(); ++I) + if (!AuctionCore.bid(I->Core.getUnits())) { + Error = SHUFFLE_ERROR_SLOTS; + return false; + } + } + + Error = SHUFFLE_SUCCESS; + return true; +} + +bool HexagonShuffler::shuffle() { + if (size() > HEXAGON_PACKET_SIZE) { + // Ignore a packet with with more than what a packet can hold + // or with compound or duplex insns for now. + Error = SHUFFLE_ERROR_INVALID; + return false; + } + + // Check and prepare packet. + if (size() > 1 && check()) + // Reorder the handles for each slot. + for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE; + ++nSlot) { + iterator ISJ, ISK; + unsigned slotSkip, slotWeight; + + // Prioritize the handles considering their restrictions. + for (ISJ = ISK = Packet.begin(), slotSkip = slotWeight = 0; + ISK != Packet.end(); ++ISK, ++slotSkip) + if (slotSkip < nSlot - emptySlots) + // Note which handle to begin at. + ++ISJ; + else + // Calculate the weight of the slot. + slotWeight += ISK->Core.setWeight(HEXAGON_PACKET_SIZE - nSlot - 1); + + if (slotWeight) + // Sort the packet, favoring source order, + // beginning after the previous slot. + std::sort(ISJ, Packet.end()); + else + // Skip unused slot. + ++emptySlots; + } + + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) + DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); + dbgs() << ':' + << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc()) + .getOpcode(); + dbgs() << '\n'); + DEBUG(dbgs() << '\n'); + + return (!getError()); +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h new file mode 100644 index 000000000000..9218fd3eb070 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -0,0 +1,139 @@ +//===----- HexagonShuffler.h - Instruction bundle shuffling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the shuffling of insns inside a bundle according to the +// packet formation rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONSHUFFLER_H +#define HEXAGONSHUFFLER_H + +#include "Hexagon.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +namespace llvm { +// Insn resources. +class HexagonResource { + // Mask of the slots or units that may execute the insn and + // the weight or priority that the insn requires to be assigned a slot. + unsigned Slots, Weight; + +public: + HexagonResource(unsigned s) { setUnits(s); }; + + void setUnits(unsigned s) { + Slots = s & ~(-1 << HEXAGON_PACKET_SIZE); + setWeight(s); + }; + unsigned setWeight(unsigned s); + + unsigned getUnits() const { return (Slots); }; + unsigned getWeight() const { return (Weight); }; + + // Check if the resources are in ascending slot order. + static bool lessUnits(const HexagonResource &A, const HexagonResource &B) { + return (countPopulation(A.getUnits()) < countPopulation(B.getUnits())); + }; + // Check if the resources are in ascending weight order. + static bool lessWeight(const HexagonResource &A, const HexagonResource &B) { + return (A.getWeight() < B.getWeight()); + }; +}; + +// Handle to an insn used by the shuffling algorithm. +class HexagonInstr { + friend class HexagonShuffler; + + MCInst const *ID; + MCInst const *Extender; + HexagonResource Core; + bool SoloException; + +public: + HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s, + bool x = false) + : ID(id), Extender(Extender), Core(s), SoloException(x){}; + + MCInst const *getDesc() const { return (ID); }; + + MCInst const *getExtender() const { return Extender; } + + unsigned isSoloException() const { return (SoloException); }; + + // Check if the handles are in ascending order for shuffling purposes. + bool operator<(const HexagonInstr &B) const { + return (HexagonResource::lessWeight(B.Core, Core)); + }; + // Check if the handles are in ascending order by core slots. + static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) { + return (HexagonResource::lessUnits(A.Core, B.Core)); + }; +}; + +// Bundle shuffler. +class HexagonShuffler { + typedef SmallVector<HexagonInstr, HEXAGON_PRESHUFFLE_PACKET_SIZE> + HexagonPacket; + + // Insn handles in a bundle. + HexagonPacket Packet; + + // Shuffling error code. + unsigned Error; + +protected: + int64_t BundleFlags; + MCInstrInfo const &MCII; + MCSubtargetInfo const &STI; + +public: + typedef HexagonPacket::iterator iterator; + + enum { + SHUFFLE_SUCCESS = 0, ///< Successful operation. + SHUFFLE_ERROR_INVALID, ///< Invalid bundle. + SHUFFLE_ERROR_STORES, ///< No free slots for store insns. + SHUFFLE_ERROR_LOADS, ///< No free slots for load insns. + SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns. + SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns. + SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots. + SHUFFLE_ERROR_UNKNOWN ///< Unknown error. + }; + + explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI); + + // Reset to initial state. + void reset(); + // Check if the bundle may be validly shuffled. + bool check(); + // Reorder the insn handles in the bundle. + bool shuffle(); + + unsigned size() const { return (Packet.size()); }; + + iterator begin() { return (Packet.begin()); }; + iterator end() { return (Packet.end()); }; + + // Add insn handle to the bundle . + void append(MCInst const *ID, MCInst const *Extender, unsigned S, + bool X = false); + + // Return the error code for the last check or shuffling of the bundle. + void setError(unsigned Err) { Error = Err; }; + unsigned getError() const { return (Error); }; +}; +} + +#endif // HEXAGONSHUFFLER_H diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp index 6c43d97837ca..be6d1a84a377 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -39,7 +39,7 @@ void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo, O << Op.getImm(); else { assert(Op.isExpr() && "unknown pcrel immediate operand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } @@ -53,7 +53,8 @@ void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '#' << Op.getImm(); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << '#' << *Op.getExpr(); + O << '#'; + Op.getExpr()->print(O, &MAI); } } @@ -75,7 +76,7 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, O << '&'; if (Disp.isExpr()) - O << *Disp.getExpr(); + Disp.getExpr()->print(O, &MAI); else { assert(Disp.isImm() && "Expected immediate in displacement field"); O << Disp.getImm(); diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt index b8f3d02ab4d8..a305b2db8683 100644 --- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MSP430Desc parent = MSP430 -required_libraries = MC MSP430AsmPrinter MSP430Info +required_libraries = MC MSP430AsmPrinter MSP430Info Support add_to_library_groups = MSP430 diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index df1aa1a41f19..c26b3081dbc3 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -12,12 +12,11 @@ //===----------------------------------------------------------------------===// #include "MSP430MCAsmInfo.h" -#include "llvm/ADT/StringRef.h" using namespace llvm; void MSP430MCAsmInfo::anchor() { } -MSP430MCAsmInfo::MSP430MCAsmInfo(StringRef TT) { +MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) { PointerSize = CalleeSaveStackSlotSize = 2; CommentString = ";"; diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index 2c9532d321e4..ff5b0b6d858c 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h @@ -17,12 +17,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class StringRef; + class Triple; class MSP430MCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit MSP430MCAsmInfo(StringRef TT); + explicit MSP430MCAsmInfo(const Triple &TT); }; } // namespace llvm diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index a99c9a3e2374..4342c10a1bf2 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -75,7 +75,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: { bool isMemOp = Modifier && !strcmp(Modifier, "mem"); @@ -92,7 +92,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, if (Offset) O << '(' << Offset << '+'; - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); if (Offset) O << ')'; diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp index b039778d96c4..54154a8afac1 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -96,7 +96,7 @@ MCOperand MSP430MCInstLower:: LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); @@ -104,8 +104,8 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { } if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), Ctx), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); return MCOperand::createExpr(Expr); } @@ -130,7 +130,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index aade12b3046a..9c054e5ac231 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -43,7 +43,7 @@ class MCInstrInfo; namespace { class MipsAssemblerOptions { public: - MipsAssemblerOptions(uint64_t Features_) : + MipsAssemblerOptions(const FeatureBitset &Features_) : ATReg(1), Reorder(true), Macro(true), Features(Features_) {} MipsAssemblerOptions(const MipsAssemblerOptions *Opts) { @@ -70,8 +70,8 @@ public: void setMacro() { Macro = true; } void setNoMacro() { Macro = false; } - uint64_t getFeatures() const { return Features; } - void setFeatures(uint64_t Features_) { Features = Features_; } + const FeatureBitset &getFeatures() const { return Features; } + void setFeatures(const FeatureBitset &Features_) { Features = Features_; } // Set of features that are either architecture features or referenced // by them (e.g.: FeatureNaN2008 implied by FeatureMips32r6). @@ -84,7 +84,7 @@ private: unsigned ATReg; bool Reorder; bool Macro; - uint64_t Features; + FeatureBitset Features; }; } @@ -247,6 +247,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetFpDirective(); bool parseSetPopDirective(); bool parseSetPushDirective(); + bool parseSetSoftFloatDirective(); + bool parseSetHardFloatDirective(); bool parseSetAssignment(); @@ -325,23 +327,23 @@ class MipsAsmParser : public MCTargetAsmParser { STI.setFeatureBits(FeatureBits); setAvailableFeatures( ComputeAvailableFeatures(STI.ToggleFeature(ArchFeature))); - AssemblerOptions.back()->setFeatures(getAvailableFeatures()); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); } void setFeatureBits(uint64_t Feature, StringRef FeatureString) { if (!(STI.getFeatureBits()[Feature])) { setAvailableFeatures( ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); } - AssemblerOptions.back()->setFeatures(getAvailableFeatures()); } void clearFeatureBits(uint64_t Feature, StringRef FeatureString) { if (STI.getFeatureBits()[Feature]) { setAvailableFeatures( ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); } - AssemblerOptions.back()->setFeatures(getAvailableFeatures()); } public: @@ -367,11 +369,11 @@ public: // Remember the initial assembler options. The user can not modify these. AssemblerOptions.push_back( - make_unique<MipsAssemblerOptions>(getAvailableFeatures())); + llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits())); // Create an assembler options environment for the user to modify. AssemblerOptions.push_back( - make_unique<MipsAssemblerOptions>(getAvailableFeatures())); + llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits())); getTargetStreamer().updateABIInfo(*this); @@ -1946,10 +1948,10 @@ void MipsAsmParser::expandLoadAddressSym( unsigned RegNo = DstRegOp.getReg(); const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymOp.getExpr()); const MCSymbolRefExpr *HiExpr = - MCSymbolRefExpr::Create(Symbol->getSymbol().getName(), + MCSymbolRefExpr::create(Symbol->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext()); const MCSymbolRefExpr *LoExpr = - MCSymbolRefExpr::Create(Symbol->getSymbol().getName(), + MCSymbolRefExpr::create(Symbol->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext()); if (!Is32BitSym) { // If it's a 64-bit architecture, expand to: @@ -1960,10 +1962,10 @@ void MipsAsmParser::expandLoadAddressSym( // dsll d,d,16 // ori d,d,lo16(sym) const MCSymbolRefExpr *HighestExpr = - MCSymbolRefExpr::Create(Symbol->getSymbol().getName(), + MCSymbolRefExpr::create(Symbol->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_HIGHEST, getContext()); const MCSymbolRefExpr *HigherExpr = - MCSymbolRefExpr::Create(Symbol->getSymbol().getName(), + MCSymbolRefExpr::create(Symbol->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext()); tmpInst.setOpcode(Mips::LUi); @@ -2102,7 +2104,7 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, else { if (ExprOffset->getKind() == MCExpr::SymbolRef) { SR = static_cast<const MCSymbolRefExpr *>(ExprOffset); - const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create( SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext()); TempInst.addOperand(MCOperand::createExpr(HiExpr)); @@ -2133,7 +2135,7 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, TempInst.addOperand(MCOperand::createImm(LoOffset)); else { if (ExprOffset->getKind() == MCExpr::SymbolRef) { - const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create( SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext()); TempInst.addOperand(MCOperand::createExpr(LoExpr)); @@ -2505,7 +2507,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); // Otherwise create a symbol reference. const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); Operands.push_back(MipsOperand::CreateImm(Res, S, E, *this)); return false; @@ -2565,14 +2567,14 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr, default: report_fatal_error("unsupported reloc value"); } - return MCConstantExpr::Create(Val, getContext()); + return MCConstantExpr::create(Val, getContext()); } if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) { // It's a symbol, create a symbolic expression from the symbol. StringRef Symbol = MSRE->getSymbol().getName(); MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr); - Res = MCSymbolRefExpr::Create(Symbol, VK, getContext()); + Res = MCSymbolRefExpr::create(Symbol, VK, getContext()); return Res; } @@ -2581,17 +2583,17 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr, // Try to create target expression. if (MipsMCExpr::isSupportedBinaryExpr(VK, BE)) - return MipsMCExpr::Create(VK, Expr, getContext()); + return MipsMCExpr::create(VK, Expr, getContext()); const MCExpr *LExp = evaluateRelocExpr(BE->getLHS(), RelocStr); const MCExpr *RExp = evaluateRelocExpr(BE->getRHS(), RelocStr); - Res = MCBinaryExpr::Create(BE->getOpcode(), LExp, RExp, getContext()); + Res = MCBinaryExpr::create(BE->getOpcode(), LExp, RExp, getContext()); return Res; } if (const MCUnaryExpr *UN = dyn_cast<MCUnaryExpr>(Expr)) { const MCExpr *UnExp = evaluateRelocExpr(UN->getSubExpr(), RelocStr); - Res = MCUnaryExpr::Create(UN->getOpcode(), UnExp, getContext()); + Res = MCUnaryExpr::create(UN->getOpcode(), UnExp, getContext()); return Res; } // Just return the original expression. @@ -2779,7 +2781,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) { Parser.Lex(); // Eat the ')' token. if (!IdVal) - IdVal = MCConstantExpr::Create(0, getContext()); + IdVal = MCConstantExpr::create(0, getContext()); // Replace the register operand with the memory operand. std::unique_ptr<MipsOperand> op( @@ -2790,10 +2792,10 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) { // Add the memory operand. if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) { int64_t Imm; - if (IdVal->EvaluateAsAbsolute(Imm)) - IdVal = MCConstantExpr::Create(Imm, getContext()); + if (IdVal->evaluateAsAbsolute(Imm)) + IdVal = MCConstantExpr::create(Imm, getContext()); else if (BE->getLHS()->getKind() != MCExpr::SymbolRef) - IdVal = MCBinaryExpr::Create(BE->getOpcode(), BE->getRHS(), BE->getLHS(), + IdVal = MCBinaryExpr::create(BE->getOpcode(), BE->getRHS(), BE->getLHS(), getContext()); } @@ -3010,7 +3012,7 @@ MipsAsmParser::parseInvNum(OperandVector &Operands) { int64_t Val = MCE->getValue(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm( - MCConstantExpr::Create(0 - Val, getContext()), S, E, *this)); + MCConstantExpr::create(0 - Val, getContext()), S, E, *this)); return MatchOperand_Success; } @@ -3034,7 +3036,7 @@ MipsAsmParser::parseLSAImm(OperandVector &Operands) { return MatchOperand_ParseFail; int64_t Val; - if (!Expr->EvaluateAsAbsolute(Val)) { + if (!Expr->evaluateAsAbsolute(Val)) { Error(S, "expected immediate value"); return MatchOperand_ParseFail; } @@ -3601,7 +3603,9 @@ bool MipsAsmParser::parseSetPopDirective() { return reportParseError(Loc, ".set pop with no .set push"); AssemblerOptions.pop_back(); - setAvailableFeatures(AssemblerOptions.back()->getFeatures()); + setAvailableFeatures( + ComputeAvailableFeatures(AssemblerOptions.back()->getFeatures())); + STI.setFeatureBits(AssemblerOptions.back()->getFeatures()); getTargetStreamer().emitDirectiveSetPop(); return false; @@ -3621,6 +3625,28 @@ bool MipsAsmParser::parseSetPushDirective() { return false; } +bool MipsAsmParser::parseSetSoftFloatDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return reportParseError("unexpected token, expected end of statement"); + + setFeatureBits(Mips::FeatureSoftFloat, "soft-float"); + getTargetStreamer().emitDirectiveSetSoftFloat(); + return false; +} + +bool MipsAsmParser::parseSetHardFloatDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return reportParseError("unexpected token, expected end of statement"); + + clearFeatureBits(Mips::FeatureSoftFloat, "soft-float"); + getTargetStreamer().emitDirectiveSetHardFloat(); + return false; +} + bool MipsAsmParser::parseSetAssignment() { StringRef Name; const MCExpr *Value; @@ -3649,7 +3675,9 @@ bool MipsAsmParser::parseSetMips0Directive() { return reportParseError("unexpected token, expected end of statement"); // Reset assembler options to their initial values. - setAvailableFeatures(AssemblerOptions.front()->getFeatures()); + setAvailableFeatures( + ComputeAvailableFeatures(AssemblerOptions.front()->getFeatures())); + STI.setFeatureBits(AssemblerOptions.front()->getFeatures()); AssemblerOptions.back()->setFeatures(AssemblerOptions.front()->getFeatures()); getTargetStreamer().emitDirectiveSetMips0(); @@ -3985,6 +4013,10 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetMsaDirective(); } else if (Tok.getString() == "nomsa") { return parseSetNoMsaDirective(); + } else if (Tok.getString() == "softfloat") { + return parseSetSoftFloatDirective(); + } else if (Tok.getString() == "hardfloat") { + return parseSetHardFloatDirective(); } else { // It is just an identifier, look for an assignment. parseSetAssignment(); @@ -4286,7 +4318,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { reportParseError("expected number after comma"); return false; } - if (!DummyNumber->EvaluateAsAbsolute(DummyNumberVal)) { + if (!DummyNumber->evaluateAsAbsolute(DummyNumberVal)) { reportParseError("expected an absolute expression after comma"); return false; } @@ -4366,7 +4398,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { return false; } - if (!FrameSize->EvaluateAsAbsolute(FrameSizeVal)) { + if (!FrameSize->evaluateAsAbsolute(FrameSizeVal)) { reportParseError("frame size not an absolute expression"); return false; } @@ -4427,7 +4459,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { return false; } - if (!BitMask->EvaluateAsAbsolute(BitMaskVal)) { + if (!BitMask->evaluateAsAbsolute(BitMaskVal)) { reportParseError("bitmask not an absolute expression"); return false; } @@ -4448,7 +4480,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { return false; } - if (!FrameOffset->EvaluateAsAbsolute(FrameOffsetVal)) { + if (!FrameOffset->evaluateAsAbsolute(FrameOffsetVal)) { reportParseError("frame offset not an absolute expression"); return false; } diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index eb97c93ac196..c8629b5d7bd2 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -47,6 +47,8 @@ public: bool isGP64() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; } + bool hasCnMips() const { return STI.getFeatureBits()[Mips::FeatureCnMips]; } + bool hasCOP3() const { // Only present in MIPS-I and MIPS-II return !hasMips32() && !hasMips3(); @@ -889,6 +891,16 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, } } + if (hasCnMips()) { + DEBUG(dbgs() << "Trying CnMips table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableCnMips32, Instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } + if (isGP64()) { DEBUG(dbgs() << "Trying Mips64 (GPR64) table (32-bit opcodes):\n"); Result = decodeInstruction(DecoderTableMips6432, Instr, Insn, diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index e80a47b90142..a5637b16b636 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -122,7 +122,8 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } -static void printExpr(const MCExpr *Expr, raw_ostream &OS) { +static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI, + raw_ostream &OS) { int Offset = 0; const MCSymbolRefExpr *SRE; @@ -132,7 +133,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { assert(SRE && CE && "Binary expression must be sym+const."); Offset = CE->getValue(); } else if (const MipsMCExpr *ME = dyn_cast<MipsMCExpr>(Expr)) { - ME->print(OS); + ME->print(OS, MAI); return; } else SRE = cast<MCSymbolRefExpr>(Expr); @@ -170,7 +171,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { case MCSymbolRefExpr::VK_Mips_PCREL_LO16: OS << "%pcrel_lo("; break; } - OS << SRE->getSymbol(); + SRE->getSymbol().print(OS, MAI); if (Offset) { if (Offset > 0) @@ -199,7 +200,7 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } assert(Op.isExpr() && "unknown operand kind in printOperand"); - printExpr(Op.getExpr(), O); + printExpr(Op.getExpr(), &MAI, O); } void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum, diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp index b1f7c2f22594..bf8f7d12880d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp @@ -99,6 +99,10 @@ unsigned MipsABIInfo::GetFramePtr() const { return ArePtrs64bit() ? Mips::FP_64 : Mips::FP; } +unsigned MipsABIInfo::GetBasePtr() const { + return ArePtrs64bit() ? Mips::S7_64 : Mips::S7; +} + unsigned MipsABIInfo::GetNullPtr() const { return ArePtrs64bit() ? Mips::ZERO_64 : Mips::ZERO; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h index 9a6ba9467659..d20dc9037951 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h @@ -65,6 +65,7 @@ public: unsigned GetStackPtr() const; unsigned GetFramePtr() const; + unsigned GetBasePtr() const; unsigned GetNullPtr() const; unsigned GetPtrAdduOp() const; unsigned GetPtrAddiuOp() const; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 8d9e3e31105e..982a7f54e825 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -12,10 +12,10 @@ #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include <list> @@ -46,7 +46,7 @@ struct MipsRelocationEntry { unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; - bool needsRelocateWithSymbol(const MCSymbolData &SD, + bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; virtual void sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs) override; @@ -65,181 +65,134 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { // determine the type of the relocation - unsigned Type = (unsigned)ELF::R_MIPS_NONE; unsigned Kind = (unsigned)Fixup.getKind(); switch (Kind) { - default: - llvm_unreachable("invalid fixup kind!"); case Mips::fixup_Mips_32: case FK_Data_4: - Type = ELF::R_MIPS_32; - break; + return IsPCRel ? ELF::R_MIPS_PC32 : ELF::R_MIPS_32; case Mips::fixup_Mips_64: case FK_Data_8: - Type = ELF::R_MIPS_64; - break; + return ELF::R_MIPS_64; case FK_GPRel_4: if (isN64()) { + unsigned Type = (unsigned)ELF::R_MIPS_NONE; Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type); Type = setRType2((unsigned)ELF::R_MIPS_64, Type); Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type); + return Type; } - else - Type = ELF::R_MIPS_GPREL32; - break; + return ELF::R_MIPS_GPREL32; case Mips::fixup_Mips_GPREL16: - Type = ELF::R_MIPS_GPREL16; - break; + return ELF::R_MIPS_GPREL16; case Mips::fixup_Mips_26: - Type = ELF::R_MIPS_26; - break; + return ELF::R_MIPS_26; case Mips::fixup_Mips_CALL16: - Type = ELF::R_MIPS_CALL16; - break; + return ELF::R_MIPS_CALL16; case Mips::fixup_Mips_GOT_Global: case Mips::fixup_Mips_GOT_Local: - Type = ELF::R_MIPS_GOT16; - break; + return ELF::R_MIPS_GOT16; case Mips::fixup_Mips_HI16: - Type = ELF::R_MIPS_HI16; - break; + return ELF::R_MIPS_HI16; case Mips::fixup_Mips_LO16: - Type = ELF::R_MIPS_LO16; - break; + return ELF::R_MIPS_LO16; case Mips::fixup_Mips_TLSGD: - Type = ELF::R_MIPS_TLS_GD; - break; + return ELF::R_MIPS_TLS_GD; case Mips::fixup_Mips_GOTTPREL: - Type = ELF::R_MIPS_TLS_GOTTPREL; - break; + return ELF::R_MIPS_TLS_GOTTPREL; case Mips::fixup_Mips_TPREL_HI: - Type = ELF::R_MIPS_TLS_TPREL_HI16; - break; + return ELF::R_MIPS_TLS_TPREL_HI16; case Mips::fixup_Mips_TPREL_LO: - Type = ELF::R_MIPS_TLS_TPREL_LO16; - break; + return ELF::R_MIPS_TLS_TPREL_LO16; case Mips::fixup_Mips_TLSLDM: - Type = ELF::R_MIPS_TLS_LDM; - break; + return ELF::R_MIPS_TLS_LDM; case Mips::fixup_Mips_DTPREL_HI: - Type = ELF::R_MIPS_TLS_DTPREL_HI16; - break; + return ELF::R_MIPS_TLS_DTPREL_HI16; case Mips::fixup_Mips_DTPREL_LO: - Type = ELF::R_MIPS_TLS_DTPREL_LO16; - break; + return ELF::R_MIPS_TLS_DTPREL_LO16; case Mips::fixup_Mips_Branch_PCRel: case Mips::fixup_Mips_PC16: - Type = ELF::R_MIPS_PC16; - break; + return ELF::R_MIPS_PC16; case Mips::fixup_Mips_GOT_PAGE: - Type = ELF::R_MIPS_GOT_PAGE; - break; + return ELF::R_MIPS_GOT_PAGE; case Mips::fixup_Mips_GOT_OFST: - Type = ELF::R_MIPS_GOT_OFST; - break; + return ELF::R_MIPS_GOT_OFST; case Mips::fixup_Mips_GOT_DISP: - Type = ELF::R_MIPS_GOT_DISP; - break; - case Mips::fixup_Mips_GPOFF_HI: + return ELF::R_MIPS_GOT_DISP; + case Mips::fixup_Mips_GPOFF_HI: { + unsigned Type = (unsigned)ELF::R_MIPS_NONE; Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type); - break; - case Mips::fixup_Mips_GPOFF_LO: + return Type; + } + case Mips::fixup_Mips_GPOFF_LO: { + unsigned Type = (unsigned)ELF::R_MIPS_NONE; Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type); - break; + return Type; + } case Mips::fixup_Mips_HIGHER: - Type = ELF::R_MIPS_HIGHER; - break; + return ELF::R_MIPS_HIGHER; case Mips::fixup_Mips_HIGHEST: - Type = ELF::R_MIPS_HIGHEST; - break; + return ELF::R_MIPS_HIGHEST; case Mips::fixup_Mips_GOT_HI16: - Type = ELF::R_MIPS_GOT_HI16; - break; + return ELF::R_MIPS_GOT_HI16; case Mips::fixup_Mips_GOT_LO16: - Type = ELF::R_MIPS_GOT_LO16; - break; + return ELF::R_MIPS_GOT_LO16; case Mips::fixup_Mips_CALL_HI16: - Type = ELF::R_MIPS_CALL_HI16; - break; + return ELF::R_MIPS_CALL_HI16; case Mips::fixup_Mips_CALL_LO16: - Type = ELF::R_MIPS_CALL_LO16; - break; + return ELF::R_MIPS_CALL_LO16; case Mips::fixup_MICROMIPS_26_S1: - Type = ELF::R_MICROMIPS_26_S1; - break; + return ELF::R_MICROMIPS_26_S1; case Mips::fixup_MICROMIPS_HI16: - Type = ELF::R_MICROMIPS_HI16; - break; + return ELF::R_MICROMIPS_HI16; case Mips::fixup_MICROMIPS_LO16: - Type = ELF::R_MICROMIPS_LO16; - break; + return ELF::R_MICROMIPS_LO16; case Mips::fixup_MICROMIPS_GOT16: - Type = ELF::R_MICROMIPS_GOT16; - break; + return ELF::R_MICROMIPS_GOT16; case Mips::fixup_MICROMIPS_PC7_S1: - Type = ELF::R_MICROMIPS_PC7_S1; - break; + return ELF::R_MICROMIPS_PC7_S1; case Mips::fixup_MICROMIPS_PC10_S1: - Type = ELF::R_MICROMIPS_PC10_S1; - break; + return ELF::R_MICROMIPS_PC10_S1; case Mips::fixup_MICROMIPS_PC16_S1: - Type = ELF::R_MICROMIPS_PC16_S1; - break; + return ELF::R_MICROMIPS_PC16_S1; case Mips::fixup_MICROMIPS_CALL16: - Type = ELF::R_MICROMIPS_CALL16; - break; + return ELF::R_MICROMIPS_CALL16; case Mips::fixup_MICROMIPS_GOT_DISP: - Type = ELF::R_MICROMIPS_GOT_DISP; - break; + return ELF::R_MICROMIPS_GOT_DISP; case Mips::fixup_MICROMIPS_GOT_PAGE: - Type = ELF::R_MICROMIPS_GOT_PAGE; - break; + return ELF::R_MICROMIPS_GOT_PAGE; case Mips::fixup_MICROMIPS_GOT_OFST: - Type = ELF::R_MICROMIPS_GOT_OFST; - break; + return ELF::R_MICROMIPS_GOT_OFST; case Mips::fixup_MICROMIPS_TLS_GD: - Type = ELF::R_MICROMIPS_TLS_GD; - break; + return ELF::R_MICROMIPS_TLS_GD; case Mips::fixup_MICROMIPS_TLS_LDM: - Type = ELF::R_MICROMIPS_TLS_LDM; - break; + return ELF::R_MICROMIPS_TLS_LDM; case Mips::fixup_MICROMIPS_TLS_DTPREL_HI16: - Type = ELF::R_MICROMIPS_TLS_DTPREL_HI16; - break; + return ELF::R_MICROMIPS_TLS_DTPREL_HI16; case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16: - Type = ELF::R_MICROMIPS_TLS_DTPREL_LO16; - break; + return ELF::R_MICROMIPS_TLS_DTPREL_LO16; case Mips::fixup_MICROMIPS_TLS_TPREL_HI16: - Type = ELF::R_MICROMIPS_TLS_TPREL_HI16; - break; + return ELF::R_MICROMIPS_TLS_TPREL_HI16; case Mips::fixup_MICROMIPS_TLS_TPREL_LO16: - Type = ELF::R_MICROMIPS_TLS_TPREL_LO16; - break; + return ELF::R_MICROMIPS_TLS_TPREL_LO16; case Mips::fixup_MIPS_PC19_S2: - Type = ELF::R_MIPS_PC19_S2; - break; + return ELF::R_MIPS_PC19_S2; case Mips::fixup_MIPS_PC18_S3: - Type = ELF::R_MIPS_PC18_S3; - break; + return ELF::R_MIPS_PC18_S3; case Mips::fixup_MIPS_PC21_S2: - Type = ELF::R_MIPS_PC21_S2; - break; + return ELF::R_MIPS_PC21_S2; case Mips::fixup_MIPS_PC26_S2: - Type = ELF::R_MIPS_PC26_S2; - break; + return ELF::R_MIPS_PC26_S2; case Mips::fixup_MIPS_PCHI16: - Type = ELF::R_MIPS_PCHI16; - break; + return ELF::R_MIPS_PCHI16; case Mips::fixup_MIPS_PCLO16: - Type = ELF::R_MIPS_PCLO16; - break; + return ELF::R_MIPS_PCLO16; } - return Type; + llvm_unreachable("invalid fixup kind!"); } // Sort entries by SortOffset in descending order. @@ -271,9 +224,7 @@ static unsigned getMatchingLoType(const MCAssembler &Asm, if (Type == ELF::R_MIPS16_HI16) return ELF::R_MIPS16_LO16; - const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol); - - if (MCELF::GetBinding(SD) != ELF::STB_LOCAL) + if (Reloc.Symbol->getBinding() != ELF::STB_LOCAL) return ELF::R_MIPS_NONE; if (Type == ELF::R_MIPS_GOT16) @@ -405,9 +356,8 @@ void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, Relocs[I] = MipsRelocs[I].R; } -bool -MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, - unsigned Type) const { +bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, + unsigned Type) const { // FIXME: This is extremely conservative. This really needs to use a // whitelist with a clear explanation for why each realocation needs to // point to the symbol, not to the section. @@ -434,7 +384,7 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, return true; case ELF::R_MIPS_32: - if (MCELF::getOther(SD) & (ELF::STO_MIPS_MICROMIPS >> 2)) + if (cast<MCSymbolELF>(Sym).getOther() & ELF::STO_MIPS_MICROMIPS) return true; // falltrough case ELF::R_MIPS_26: diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index d2b51831245a..b45d9cf621d7 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -9,8 +9,8 @@ #include "MipsELFStreamer.h" #include "MipsTargetStreamer.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/ELF.h" using namespace llvm; @@ -41,12 +41,10 @@ void MipsELFStreamer::createPendingLabelRelocs() { // FIXME: Also mark labels when in MIPS16 mode. if (ELFTargetStreamer->isMicroMipsEnabled()) { - for (auto Label : Labels) { - MCSymbolData &Data = getOrCreateSymbolData(Label); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - MCELF::setOther(Data, ELF::STO_MIPS_MICROMIPS >> 2); + for (auto *L : Labels) { + auto *Label = cast<MCSymbolELF>(L); + getAssembler().registerSymbol(*Label); + Label->setOther(ELF::STO_MIPS_MICROMIPS); } } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index e2bd5a815ab1..4d554583dc78 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -18,8 +18,7 @@ using namespace llvm; void MipsMCAsmInfo::anchor() { } -MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) { if ((TheTriple.getArch() == Triple::mips) || (TheTriple.getArch() == Triple::mips64)) IsLittleEndian = false; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h index 59ff1c41ed6e..5d23fcbd7a44 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h @@ -17,12 +17,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class StringRef; + class Triple; class MipsMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit MipsMCAsmInfo(StringRef TT); + explicit MipsMCAsmInfo(const Triple &TheTriple); }; } // namespace llvm diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index a0d9e1540515..93925bf8ca03 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -467,7 +467,7 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { int64_t Res; - if (Expr->EvaluateAsAbsolute(Res)) + if (Expr->evaluateAsAbsolute(Res)) return Res; MCExpr::ExprKind Kind = Expr->getKind(); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index 74490f334b37..c85fc4816b08 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -41,7 +41,7 @@ bool MipsMCExpr::isSupportedBinaryExpr(MCSymbolRefExpr::VariantKind VK, } const MipsMCExpr* -MipsMCExpr::Create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr, +MipsMCExpr::create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr, MCContext &Ctx) { VariantKind Kind; switch (VK) { @@ -64,7 +64,7 @@ MipsMCExpr::Create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr, return new (Ctx) MipsMCExpr(Kind, Expr); } -void MipsMCExpr::PrintImpl(raw_ostream &OS) const { +void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { switch (Kind) { default: llvm_unreachable("Invalid kind!"); case VK_Mips_LO: OS << "%lo"; break; @@ -74,15 +74,15 @@ void MipsMCExpr::PrintImpl(raw_ostream &OS) const { } OS << '('; - Expr->print(OS); + Expr->print(OS, MAI); OS << ')'; } bool -MipsMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { - return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup); + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); } void MipsMCExpr::visitUsedExpr(MCStreamer &Streamer) const { diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h index ee11461ef174..fd2ed17ee785 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h @@ -37,7 +37,7 @@ public: static bool isSupportedBinaryExpr(MCSymbolRefExpr::VariantKind VK, const MCBinaryExpr *BE); - static const MipsMCExpr *Create(MCSymbolRefExpr::VariantKind VK, + static const MipsMCExpr *create(MCSymbolRefExpr::VariantKind VK, const MCExpr *Expr, MCContext &Ctx); /// getOpcode - Get the kind of this expression. @@ -46,13 +46,13 @@ public: /// getSubExpr - Get the child of this expression. const MCExpr *getSubExpr() const { return Expr; } - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); + MCSection *findAssociatedSection() const override { + return getSubExpr()->findAssociatedSection(); } // There are no TLS MipsMCExprs at the moment. diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 2e3179ac28d9..54d88632abdb 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -75,7 +75,8 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { +static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT) { MCAsmInfo *MAI = new MipsMCAsmInfo(TT); unsigned SP = MRI.getDwarfRegNum(Mips::SP, true); diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 8e6f047450e3..a051f4c123fc 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -17,10 +17,9 @@ #include "MipsTargetObjectFile.h" #include "MipsTargetStreamer.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -81,6 +80,12 @@ void MipsTargetStreamer::emitDirectiveSetMips64R5() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetPop() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetPush() { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetSoftFloat() { + forbidModuleDirective(); +} +void MipsTargetStreamer::emitDirectiveSetHardFloat() { + forbidModuleDirective(); +} void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} @@ -308,6 +313,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetPush() { MipsTargetStreamer::emitDirectiveSetPush(); } +void MipsTargetAsmStreamer::emitDirectiveSetSoftFloat() { + OS << "\t.set\tsoftfloat\n"; + MipsTargetStreamer::emitDirectiveSetSoftFloat(); +} + +void MipsTargetAsmStreamer::emitDirectiveSetHardFloat() { + OS << "\t.set\thardfloat\n"; + MipsTargetStreamer::emitDirectiveSetHardFloat(); +} + // Print a 32 bit hex number with all numbers. static void printHex32(unsigned Value, raw_ostream &OS) { OS << "0x"; @@ -358,7 +373,6 @@ void MipsTargetAsmStreamer::emitDirectiveModuleFP( MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) { MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI); - StringRef ModuleValue; OS << "\t.module\tfp="; OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } @@ -367,7 +381,6 @@ void MipsTargetAsmStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { MipsTargetStreamer::emitDirectiveSetFp(Value); - StringRef ModuleValue; OS << "\t.set\tfp="; OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } @@ -440,18 +453,16 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, MCA.setELFHeaderEFlags(EFlags); } -void MipsTargetELFStreamer::emitLabel(MCSymbol *Symbol) { +void MipsTargetELFStreamer::emitLabel(MCSymbol *S) { + auto *Symbol = cast<MCSymbolELF>(S); if (!isMicroMipsEnabled()) return; - MCSymbolData &Data = getStreamer().getOrCreateSymbolData(Symbol); - uint8_t Type = MCELF::GetType(Data); + getStreamer().getAssembler().registerSymbol(*Symbol); + uint8_t Type = Symbol->getType(); if (Type != ELF::STT_FUNC) return; - // The "other" values are stored in the last 6 bits of the second byte - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - MCELF::setOther(Data, ELF::STO_MIPS_MICROMIPS >> 2); + Symbol->setOther(ELF::STO_MIPS_MICROMIPS); } void MipsTargetELFStreamer::finish() { @@ -505,23 +516,18 @@ void MipsTargetELFStreamer::finish() { emitMipsAbiFlags(); } -void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol, - const MCExpr *Value) { +void MipsTargetELFStreamer::emitAssignment(MCSymbol *S, const MCExpr *Value) { + auto *Symbol = cast<MCSymbolELF>(S); // If on rhs is micromips symbol then mark Symbol as microMips. if (Value->getKind() != MCExpr::SymbolRef) return; - const MCSymbol &RhsSym = - static_cast<const MCSymbolRefExpr *>(Value)->getSymbol(); - MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym); + const auto &RhsSym = cast<MCSymbolELF>( + static_cast<const MCSymbolRefExpr *>(Value)->getSymbol()); - if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2))) + if (!(RhsSym.getOther() & ELF::STO_MIPS_MICROMIPS)) return; - MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - MCELF::setOther(SymbolData, ELF::STO_MIPS_MICROMIPS >> 2); + Symbol->setOther(ELF::STO_MIPS_MICROMIPS); } MCELFStreamer &MipsTargetELFStreamer::getStreamer() { @@ -568,7 +574,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { ELF::SHF_ALLOC | ELF::SHT_REL); const MCSymbolRefExpr *ExprRef = - MCSymbolRefExpr::Create(Name, MCSymbolRefExpr::VK_None, Context); + MCSymbolRefExpr::create(Name, MCSymbolRefExpr::VK_None, Context); MCA.registerSection(*Sec); Sec->setAlignment(4); @@ -693,12 +699,12 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { StringRef SymName("_gp_disp"); MCAssembler &MCA = getStreamer().getAssembler(); MCSymbol *GP_Disp = MCA.getContext().getOrCreateSymbol(SymName); - MCA.getOrCreateSymbolData(*GP_Disp); + MCA.registerSymbol(*GP_Disp); MCInst TmpInst; TmpInst.setOpcode(Mips::LUi); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); - const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::create( "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_HI, MCA.getContext()); TmpInst.addOperand(MCOperand::createExpr(HiSym)); getStreamer().EmitInstruction(TmpInst, STI); @@ -708,7 +714,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { TmpInst.setOpcode(Mips::ADDiu); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); - const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::create( "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_LO, MCA.getContext()); TmpInst.addOperand(MCOperand::createExpr(LoSym)); getStreamer().EmitInstruction(TmpInst, STI); @@ -752,9 +758,9 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, getStreamer().EmitInstruction(Inst, STI); Inst.clear(); - const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create( &Sym, MCSymbolRefExpr::VK_Mips_GPOFF_HI, MCA.getContext()); - const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create( + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create( &Sym, MCSymbolRefExpr::VK_Mips_GPOFF_LO, MCA.getContext()); // lui $gp, %hi(%neg(%gp_rel(funcSym))) diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 272933f7285e..8a27874a37ce 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -305,8 +305,9 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in { def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst), (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; -// Cavium Octeon cmMIPS instructions -let EncodingPredicates = []<Predicate>, // FIXME: The lack of HasStdEnc is probably a bug +// Cavium Octeon cnMIPS instructions +let DecoderNamespace = "CnMips", + EncodingPredicates = []<Predicate>, // FIXME: The lack of HasStdEnc is probably a bug AdditionalPredicates = [HasCnMips] in { class Count1s<string opstr, RegisterOperand RO>: @@ -353,6 +354,10 @@ class CBranchBitNum<string opstr, DAGOperand opnd, PatFrag cond_op, let Defs = [AT]; } +class MFC2OP<string asmstr, RegisterOperand RO> : + InstSE<(outs RO:$rt, uimm16:$imm16), (ins), + !strconcat(asmstr, "\t$rt, $imm16"), [], NoItinerary, FrmFR>; + // Unsigned Byte Add let Pattern = [(set GPR64Opnd:$rd, (and (add GPR64Opnd:$rs, GPR64Opnd:$rt), 255))] in @@ -415,6 +420,9 @@ let Defs = [MPL1, MPL2, P0, P1, P2] in def VMULU : ArithLogicR<"vmulu", GPR64Opnd, 0, II_DMUL>, ADD_FM<0x1c, 0x0f>; +// Move between CPU and coprocessor registers +def DMFC2_OCTEON : MFC2OP<"dmfc2", GPR64Opnd>, MFC2OP_FM<0x12, 1>; +def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd>, MFC2OP_FM<0x12, 5>; } } diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index a3995b8ceb99..f84666b6229e 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -41,7 +41,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -594,11 +594,11 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_BlockAddress: { @@ -778,7 +778,7 @@ void MipsAsmPrinter::EmitJal(const MCSubtargetInfo &STI, MCSymbol *Symbol) { MCInst I; I.setOpcode(Mips::JAL); I.addOperand( - MCOperand::createExpr(MCSymbolRefExpr::Create(Symbol, OutContext))); + MCOperand::createExpr(MCSymbolRefExpr::create(Symbol, OutContext))); OutStreamer->EmitInstruction(I, STI); } @@ -983,7 +983,8 @@ void MipsAsmPrinter::EmitFPCallStub( // __call_stub_fp_xxxx: // std::string x = "__call_stub_fp_" + std::string(Symbol); - MCSymbol *Stub = OutContext.getOrCreateSymbol(StringRef(x)); + MCSymbolELF *Stub = + cast<MCSymbolELF>(OutContext.getOrCreateSymbol(StringRef(x))); TS.emitDirectiveEnt(*Stub); MCSymbol *MType = OutContext.getOrCreateSymbol("__call_stub_fp_" + Twine(Symbol)); @@ -1028,10 +1029,10 @@ void MipsAsmPrinter::EmitFPCallStub( MCSymbol *Tmp = OutContext.createTempSymbol(); OutStreamer->EmitLabel(Tmp); - const MCSymbolRefExpr *E = MCSymbolRefExpr::Create(Stub, OutContext); - const MCSymbolRefExpr *T = MCSymbolRefExpr::Create(Tmp, OutContext); - const MCExpr *T_min_E = MCBinaryExpr::CreateSub(T, E, OutContext); - OutStreamer->EmitELFSize(Stub, T_min_E); + const MCSymbolRefExpr *E = MCSymbolRefExpr::create(Stub, OutContext); + const MCSymbolRefExpr *T = MCSymbolRefExpr::create(Tmp, OutContext); + const MCExpr *T_min_E = MCBinaryExpr::createSub(T, E, OutContext); + OutStreamer->emitELFSize(Stub, T_min_E); TS.emitDirectiveEnd(x); OutStreamer->PopSection(); } diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 4faee10744b3..3d020abe2704 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -82,6 +82,7 @@ class MipsFastISel final : public FastISel { LLVMContext *Context; bool fastLowerCall(CallLoweringInfo &CLI) override; + bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; bool TargetSupported; bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle @@ -94,6 +95,7 @@ private: bool selectLoad(const Instruction *I); bool selectStore(const Instruction *I); bool selectBranch(const Instruction *I); + bool selectSelect(const Instruction *I); bool selectCmp(const Instruction *I); bool selectFPExt(const Instruction *I); bool selectFPTrunc(const Instruction *I); @@ -102,6 +104,7 @@ private: bool selectTrunc(const Instruction *I); bool selectIntExt(const Instruction *I); bool selectShift(const Instruction *I); + bool selectDivRem(const Instruction *I, unsigned ISDOpcode); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); @@ -140,6 +143,7 @@ private: unsigned materializeGV(const GlobalValue *GV, MVT VT); unsigned materializeInt(const Constant *C, MVT VT); unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); + unsigned materializeExternalCallSym(const char *SynName); MachineInstrBuilder emitInst(unsigned Opc) { return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); @@ -156,6 +160,12 @@ private: unsigned MemReg, int64_t MemOffset) { return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset); } + + unsigned fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + // for some reason, this default is not generated by tablegen // so we explicitly generate it here. // @@ -359,6 +369,15 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) { return DestReg; } +unsigned MipsFastISel::materializeExternalCallSym(const char *SymName) { + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + unsigned DestReg = createResultReg(RC); + emitInst(Mips::LW, DestReg) + .addReg(MFI->getGlobalBaseReg()) + .addExternalSymbol(SymName, MipsII::MO_GOT); + return DestReg; +} + // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) { @@ -463,15 +482,51 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { } bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { - const GlobalValue *GV = dyn_cast<GlobalValue>(V); - if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic()) - return false; - if (!GV) - return false; + const User *U = nullptr; + unsigned Opcode = Instruction::UserOp1; + + if (const auto *I = dyn_cast<Instruction>(V)) { + // Check if the value is defined in the same basic block. This information + // is crucial to know whether or not folding an operand is valid. + if (I->getParent() == FuncInfo.MBB->getBasicBlock()) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: + break; + case Instruction::BitCast: + // Look past bitcasts if its operand is in the same BB. + return computeCallAddress(U->getOperand(0), Addr); + break; + case Instruction::IntToPtr: + // Look past no-op inttoptrs if its operand is in the same BB. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return computeCallAddress(U->getOperand(0), Addr); + break; + case Instruction::PtrToInt: + // Look past no-op ptrtoints if its operand is in the same BB. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return computeCallAddress(U->getOperand(0), Addr); + break; + } + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { Addr.setGlobalValue(GV); return true; } + + // If all else fails, try to materialize the value in a register. + if (!Addr.getGlobalValue()) { + Addr.setReg(getRegForValue(V)); + return Addr.getReg() != 0; + } + return false; } @@ -893,6 +948,50 @@ bool MipsFastISel::selectFPExt(const Instruction *I) { return true; } +bool MipsFastISel::selectSelect(const Instruction *I) { + assert(isa<SelectInst>(I) && "Expected a select instruction."); + + MVT VT; + if (!isTypeSupported(I->getType(), VT)) + return false; + + unsigned CondMovOpc; + const TargetRegisterClass *RC; + + if (VT.isInteger() && !VT.isVector() && VT.getSizeInBits() <= 32) { + CondMovOpc = Mips::MOVN_I_I; + RC = &Mips::GPR32RegClass; + } else if (VT == MVT::f32) { + CondMovOpc = Mips::MOVN_I_S; + RC = &Mips::FGR32RegClass; + } else if (VT == MVT::f64) { + CondMovOpc = Mips::MOVN_I_D32; + RC = &Mips::AFGR64RegClass; + } else + return false; + + const SelectInst *SI = cast<SelectInst>(I); + const Value *Cond = SI->getCondition(); + unsigned Src1Reg = getRegForValue(SI->getTrueValue()); + unsigned Src2Reg = getRegForValue(SI->getFalseValue()); + unsigned CondReg = getRegForValue(Cond); + + if (!Src1Reg || !Src2Reg || !CondReg) + return false; + + unsigned ResultReg = createResultReg(RC); + unsigned TempReg = createResultReg(RC); + + if (!ResultReg || !TempReg) + return false; + + emitInst(TargetOpcode::COPY, TempReg).addReg(Src2Reg); + emitInst(CondMovOpc, ResultReg) + .addReg(Src1Reg).addReg(CondReg).addReg(TempReg); + updateValueMap(I, ResultReg); + return true; +} + // Attempt to fast-select a floating-point truncate instruction. bool MipsFastISel::selectFPTrunc(const Instruction *I) { if (UnsupportedFPMode) @@ -1135,7 +1234,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - // const char *SymName = CLI.SymName; + const char *SymName = CLI.SymName; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) @@ -1182,8 +1281,15 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { if (!processCallArgs(CLI, OutVTs, NumBytes)) return false; + if (!Addr.getGlobalValue()) + return false; + // Issue the call. - unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); + unsigned DestAddress; + if (SymName) + DestAddress = materializeExternalCallSym(SymName); + else + DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR), @@ -1203,6 +1309,98 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { return finishCall(CLI, RetVT, NumBytes); } +bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::bswap: { + Type *RetTy = II->getCalledFunction()->getReturnType(); + + MVT VT; + if (!isTypeSupported(RetTy, VT)) + return false; + + unsigned SrcReg = getRegForValue(II->getOperand(0)); + if (SrcReg == 0) + return false; + unsigned DestReg = createResultReg(&Mips::GPR32RegClass); + if (DestReg == 0) + return false; + if (VT == MVT::i16) { + if (Subtarget->hasMips32r2()) { + emitInst(Mips::WSBH, DestReg).addReg(SrcReg); + updateValueMap(II, DestReg); + return true; + } else { + unsigned TempReg[3]; + for (int i = 0; i < 3; i++) { + TempReg[i] = createResultReg(&Mips::GPR32RegClass); + if (TempReg[i] == 0) + return false; + } + emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8); + emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8); + emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]); + emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF); + updateValueMap(II, DestReg); + return true; + } + } else if (VT == MVT::i32) { + if (Subtarget->hasMips32r2()) { + unsigned TempReg = createResultReg(&Mips::GPR32RegClass); + emitInst(Mips::WSBH, TempReg).addReg(SrcReg); + emitInst(Mips::ROTR, DestReg).addReg(TempReg).addImm(16); + updateValueMap(II, DestReg); + return true; + } else { + unsigned TempReg[8]; + for (int i = 0; i < 8; i++) { + TempReg[i] = createResultReg(&Mips::GPR32RegClass); + if (TempReg[i] == 0) + return false; + } + + emitInst(Mips::SRL, TempReg[0]).addReg(SrcReg).addImm(8); + emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(24); + emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[0]).addImm(0xFF00); + emitInst(Mips::OR, TempReg[3]).addReg(TempReg[1]).addReg(TempReg[2]); + + emitInst(Mips::ANDi, TempReg[4]).addReg(SrcReg).addImm(0xFF00); + emitInst(Mips::SLL, TempReg[5]).addReg(TempReg[4]).addImm(8); + + emitInst(Mips::SLL, TempReg[6]).addReg(SrcReg).addImm(24); + emitInst(Mips::OR, TempReg[7]).addReg(TempReg[3]).addReg(TempReg[5]); + emitInst(Mips::OR, DestReg).addReg(TempReg[6]).addReg(TempReg[7]); + updateValueMap(II, DestReg); + return true; + } + } + return false; + } + case Intrinsic::memcpy: + case Intrinsic::memmove: { + const auto *MTI = cast<MemTransferInst>(II); + // Don't handle volatile. + if (MTI->isVolatile()) + return false; + if (!MTI->getLength()->getType()->isIntegerTy(32)) + return false; + const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + } + case Intrinsic::memset: { + const MemSetInst *MSI = cast<MemSetInst>(II); + // Don't handle volatile. + if (MSI->isVolatile()) + return false; + if (!MSI->getLength()->getType()->isIntegerTy(32)) + return false; + return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + } + } + return false; +} + bool MipsFastISel::selectRet(const Instruction *I) { const Function &F = *I->getParent()->getParent(); const ReturnInst *Ret = cast<ReturnInst>(I); @@ -1420,6 +1618,50 @@ unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, return Success ? DestReg : 0; } +bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) { + EVT DestEVT = TLI.getValueType(I->getType(), true); + if (!DestEVT.isSimple()) + return false; + + MVT DestVT = DestEVT.getSimpleVT(); + if (DestVT != MVT::i32) + return false; + + unsigned DivOpc; + switch (ISDOpcode) { + default: + return false; + case ISD::SDIV: + case ISD::SREM: + DivOpc = Mips::SDIV; + break; + case ISD::UDIV: + case ISD::UREM: + DivOpc = Mips::UDIV; + break; + } + + unsigned Src0Reg = getRegForValue(I->getOperand(0)); + unsigned Src1Reg = getRegForValue(I->getOperand(1)); + if (!Src0Reg || !Src1Reg) + return false; + + emitInst(DivOpc).addReg(Src0Reg).addReg(Src1Reg); + emitInst(Mips::TEQ).addReg(Src1Reg).addReg(Mips::ZERO).addImm(7); + + unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); + if (!ResultReg) + return false; + + unsigned MFOpc = (ISDOpcode == ISD::SREM || ISDOpcode == ISD::UREM) + ? Mips::MFHI + : Mips::MFLO; + emitInst(MFOpc, ResultReg); + + updateValueMap(I, ResultReg); + return true; +} + bool MipsFastISel::selectShift(const Instruction *I) { MVT RetVT; @@ -1505,6 +1747,22 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) { return selectLoad(I); case Instruction::Store: return selectStore(I); + case Instruction::SDiv: + if (!selectBinaryOp(I, ISD::SDIV)) + return selectDivRem(I, ISD::SDIV); + return true; + case Instruction::UDiv: + if (!selectBinaryOp(I, ISD::UDIV)) + return selectDivRem(I, ISD::UDIV); + return true; + case Instruction::SRem: + if (!selectBinaryOp(I, ISD::SREM)) + return selectDivRem(I, ISD::SREM); + return true; + case Instruction::URem: + if (!selectBinaryOp(I, ISD::UREM)) + return selectDivRem(I, ISD::UREM); + return true; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: @@ -1533,6 +1791,8 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) { case Instruction::ICmp: case Instruction::FCmp: return selectCmp(I); + case Instruction::Select: + return selectSelect(I); } return false; } @@ -1563,6 +1823,33 @@ void MipsFastISel::simplifyAddress(Address &Addr) { } } +unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + // We treat the MUL instruction in a special way because it clobbers + // the HI0 & LO0 registers. The TableGen definition of this instruction can + // mark these registers only as implicitly defined. As a result, the + // register allocator runs out of registers when this instruction is + // followed by another instruction that defines the same registers too. + // We can fix this by explicitly marking those registers as dead. + if (MachineInstOpcode == Mips::MUL) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Mips::HI0, RegState::ImplicitDefine | RegState::Dead) + .addReg(Mips::LO0, RegState::ImplicitDefine | RegState::Dead); + return ResultReg; + } + + return FastISel::fastEmitInst_rr(MachineInstOpcode, RC, Op0, Op0IsKill, Op1, + Op1IsKill); +} + namespace llvm { FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 826fbaf4d00a..a74c8abd2e2d 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -90,12 +90,23 @@ const MipsFrameLowering *MipsFrameLowering::create(const MipsSubtarget &ST) { } // hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. +// pointer register. This is true if the function has variable sized allocas, +// if it needs dynamic stack realignment, if frame pointer elimination is +// disabled, or if the frame address is taken. bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); + MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || + TRI->needsStackRealignment(MF); +} + +bool MipsFrameLowering::hasBP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MFI->hasVarSizedObjects() && TRI->needsStackRealignment(MF); } uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 0b5183081e09..5eabd58e8686 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -32,6 +32,8 @@ public: bool hasFP(const MachineFunction &MF) const override; + bool hasBP(const MachineFunction &MF) const; + bool isFPCloseToIncomingSP() const override { return false; } void diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 6c7f0895b426..67ddcc4dacb9 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3547,7 +3547,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op, } bool MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 6ea14b53a57f..bc9a1ce64097 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -514,7 +514,8 @@ namespace llvm { return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 02ecf32d3e47..5f4fcc354616 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -226,6 +226,18 @@ class MFC3OP_FM<bits<6> op, bits<5> mfmt> let Inst{2-0} = sel; } +class MFC2OP_FM<bits<6> op, bits<5> mfmt> : StdArch { + bits<5> rt; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = mfmt; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + class ADD_FM<bits<6> op, bits<6> funct> : StdArch { bits<5> rd; bits<5> rs; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 9e611804376b..6b2a44d7a893 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -101,7 +101,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, llvm_unreachable("<unknown operand type>"); } - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx); + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); if (!Offset) return MCOperand::createExpr(MCSym); @@ -109,8 +109,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, // Assume offset is never negative. assert(Offset > 0); - const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); - const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); + const MCConstantExpr *OffsetExpr = MCConstantExpr::create(Offset, *Ctx); + const MCBinaryExpr *Add = MCBinaryExpr::createAdd(MCSym, OffsetExpr, *Ctx); return MCOperand::createExpr(Add); } @@ -155,11 +155,11 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO, MCOperand MipsMCInstLower::createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, MCSymbolRefExpr::VariantKind Kind) const { - const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::Create(BB1->getSymbol(), *Ctx); - const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::Create(BB2->getSymbol(), *Ctx); - const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Sym1, Sym2, *Ctx); + const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::create(BB1->getSymbol(), *Ctx); + const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::create(BB2->getSymbol(), *Ctx); + const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Sym1, Sym2, *Ctx); - return MCOperand::createExpr(MipsMCExpr::Create(Kind, Sub, *Ctx)); + return MCOperand::createExpr(MipsMCExpr::create(Kind, Sub, *Ctx)); } void MipsMCInstLower:: diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index f72fb4d622ec..f6647e6a8468 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/Constants.h" @@ -178,6 +179,15 @@ getReservedRegs(const MachineFunction &MF) const { else { Reserved.set(Mips::FP); Reserved.set(Mips::FP_64); + + // Reserve the base register if we need to both realign the stack and + // allocate variable-sized objects at runtime. This should test the + // same conditions as MipsFrameLowering::hasBP(). + if (needsStackRealignment(MF) && + MF.getFrameInfo()->hasVarSizedObjects()) { + Reserved.set(Mips::S7); + Reserved.set(Mips::S7_64); + } } } @@ -271,6 +281,67 @@ getFrameRegister(const MachineFunction &MF) const { else return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) : (IsN64 ? Mips::SP_64 : Mips::SP); +} +bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const { + const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>(); + unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64; + unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64; + + // Support dynamic stack realignment only for targets with standard encoding. + if (!Subtarget.hasStandardEncoding()) + return false; + + // We can't perform dynamic stack realignment if we can't reserve the + // frame pointer register. + if (!MF.getRegInfo().canReserveReg(FP)) + return false; + + // We can realign the stack if we know the maximum call frame size and we + // don't have variable sized objects. + if (Subtarget.getFrameLowering()->hasReservedCallFrame(MF)) + return true; + + // We have to reserve the base pointer register in the presence of variable + // sized objects. + return MF.getRegInfo().canReserveReg(BP); } +bool MipsRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + bool CanRealign = canRealignStack(MF); + + // Avoid realigning functions that explicitly do not want to be realigned. + // Normally, we should report an error when a function should be dynamically + // realigned but also has the attribute no-realign-stack. Unfortunately, + // with this attribute, MachineFrameInfo clamps each new object's alignment + // to that of the stack's alignment as specified by the ABI. As a result, + // the information of whether we have objects with larger alignment + // requirement than the stack's alignment is already lost at this point. + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + return false; + + const Function *F = MF.getFunction(); + if (F->hasFnAttribute(Attribute::StackAlignment)) { +#ifdef DEBUG + if (!CanRealign) + DEBUG(dbgs() << "It's not possible to realign the stack of the function: " + << F->getName() << "\n"); +#endif + return CanRealign; + } + + unsigned StackAlignment = Subtarget.getFrameLowering()->getStackAlignment(); + if (MFI->getMaxAlignment() > StackAlignment) { +#ifdef DEBUG + if (!CanRealign) + DEBUG(dbgs() << "It's not possible to realign the stack of the function: " + << F->getName() << "\n"); +#endif + return CanRealign; + } + + return false; +} diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 76e84bd142b9..ee1f6bcd7390 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -57,6 +57,14 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = nullptr) const; + + // Stack realignment queries. + bool canRealignStack(const MachineFunction &MF) const; + + bool needsStackRealignment(const MachineFunction &MF) const override; + /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 19efa59e1fdf..ec7bf314c641 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -382,6 +382,11 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); unsigned ADDu = ABI.GetPtrAdduOp(); + unsigned ADDiu = ABI.GetPtrAddiuOp(); + unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND; + + const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? + &Mips::GPR64RegClass : &Mips::GPR32RegClass; // First, compute final stack size. uint64_t StackSize = MFI->getStackSize(); @@ -464,15 +469,12 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, } if (MipsFI->callsEhReturn()) { - const TargetRegisterClass *PtrRC = - ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, - MipsFI->getEhDataRegFI(I), PtrRC, &RegInfo); + MipsFI->getEhDataRegFI(I), RC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. @@ -497,6 +499,26 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, nullptr, MRI->getDwarfRegNum(FP, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + + if (RegInfo.needsStackRealignment(MF)) { + // addiu $Reg, $zero, -MaxAlignment + // andi $sp, $sp, $Reg + unsigned VR = MF.getRegInfo().createVirtualRegister(RC); + assert(isInt<16>(MFI->getMaxAlignment()) && + "Function's alignment size requirement is not supported."); + int MaxAlign = - (signed) MFI->getMaxAlignment(); + + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign); + BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); + + if (hasBP(MF)) { + // move $s7, $sp + unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7; + BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP) + .addReg(SP) + .addReg(ZERO); + } + } } } @@ -606,10 +628,14 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); MipsABIInfo ABI = STI.getABI(); unsigned FP = ABI.GetFramePtr(); + unsigned BP = ABI.IsN64() ? Mips::S7_64 : Mips::S7; // Mark $fp as used if function has dedicated frame pointer. if (hasFP(MF)) MRI.setPhysRegUsed(FP); + // Mark $s7 as used if function has dedicated base pointer. + if (hasBP(MF)) + MRI.setPhysRegUsed(BP); // Create spill slots for eh data registers if function calls eh_return. if (MipsFI->callsEhReturn()) diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 8c74a98ecca6..132c3a1001ad 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -110,8 +110,11 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + MipsABIInfo ABI = static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI(); + const MipsRegisterInfo *RegInfo = + static_cast<const MipsRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -135,7 +138,14 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI) FrameReg = ABI.GetStackPtr(); - else + else if (RegInfo->needsStackRealignment(MF)) { + if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex)) + FrameReg = ABI.GetBasePtr(); + else if (MFI->isFixedObjectIndex(FrameIndex)) + FrameReg = getFrameRegister(MF); + else + FrameReg = ABI.GetStackPtr(); + } else FrameReg = getFrameRegister(MF); // Calculate final offset. diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 22b0c6c6685d..fed06005e9c8 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -72,6 +72,8 @@ public: virtual void emitDirectiveSetNoDsp(); virtual void emitDirectiveSetPop(); virtual void emitDirectiveSetPush(); + virtual void emitDirectiveSetSoftFloat(); + virtual void emitDirectiveSetHardFloat(); // PIC support virtual void emitDirectiveCpLoad(unsigned RegNo); @@ -188,6 +190,8 @@ public: void emitDirectiveSetNoDsp() override; void emitDirectiveSetPop() override; void emitDirectiveSetPush() override; + void emitDirectiveSetSoftFloat() override; + void emitDirectiveSetHardFloat() override; // PIC support void emitDirectiveCpLoad(unsigned RegNo) override; diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp index 3615c146a527..6a65943515bb 100644 --- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp +++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp @@ -23,9 +23,9 @@ extern "C" void LLVMInitializeMipsTargetInfo() { /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel"); RegisterTarget<Triple::mips64, - /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]"); + /*HasJIT=*/true> A(TheMips64Target, "mips64", "Mips64 [experimental]"); RegisterTarget<Triple::mips64el, - /*HasJIT=*/false> B(TheMips64elTarget, + /*HasJIT=*/true> B(TheMips64elTarget, "mips64el", "Mips64el [experimental]"); } diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index cdd2f1f5944f..d48a7a9b1fcc 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -20,7 +20,7 @@ set(NVPTXCodeGen_sources NVPTXImageOptimizer.cpp NVPTXInstrInfo.cpp NVPTXLowerAggrCopies.cpp - NVPTXLowerStructArgs.cpp + NVPTXLowerKernelArgs.cpp NVPTXMCExpr.cpp NVPTXPrologEpilogPass.cpp NVPTXRegisterInfo.cpp diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index ac92df901243..4594c22b8701 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -85,7 +85,7 @@ void NVPTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << markup("<imm:") << formatImm(Op.getImm()) << markup(">"); } else { assert(Op.isExpr() && "Unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index b9df3d18f941..ef36c13b49f1 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -25,8 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile", void NVPTXMCAsmInfo::anchor() {} -NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) { if (TheTriple.getArch() == Triple::nvptx64) { PointerSize = CalleeSaveStackSlotSize = 8; } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index c3242866b177..b432e065c2f4 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -18,12 +18,12 @@ namespace llvm { class Target; -class StringRef; +class Triple; class NVPTXMCAsmInfo : public MCAsmInfo { virtual void anchor(); public: - explicit NVPTXMCAsmInfo(StringRef TT); + explicit NVPTXMCAsmInfo(const Triple &TheTriple); }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 382525d27a25..477b0bac6ca8 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -69,7 +69,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping); MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); -FunctionPass *createNVPTXLowerStructArgsPass(); +FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 3bbea400e53e..298b992b241f 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -266,7 +266,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_ExternalSymbol: @@ -283,11 +283,11 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, default: report_fatal_error("Unsupported FP type"); break; case Type::FloatTyID: MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); + NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); break; case Type::DoubleTyID: MCOp = MCOperand::createExpr( - NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); + NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); break; } break; @@ -334,7 +334,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { const MCExpr *Expr; - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); return MCOperand::createExpr(Expr); } @@ -418,9 +418,8 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( const MachineBasicBlock &MBB) const { MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); - // TODO: isLoopHeader() should take "const MachineBasicBlock *". // We insert .pragma "nounroll" only to the loop header. - if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB))) + if (!LI.isLoopHeader(&MBB)) return false; // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, @@ -468,7 +467,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { printReturnValStr(*MF, O); } - O << *CurrentFnSym; + CurrentFnSym->print(O, MAI); emitFunctionParamList(*MF, O); @@ -625,7 +624,8 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { else O << ".func "; printReturnValStr(F, O); - O << *getSymbol(F) << "\n"; + getSymbol(F)->print(O, MAI); + O << "\n"; emitFunctionParamList(F, O); O << ";\n"; } @@ -1172,7 +1172,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, else O << getPTXFundamentalTypeStr(ETy, false); O << " "; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); // Ptx allows variable initilization only for constant and global state // spaces. @@ -1189,11 +1189,9 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // The frontend adds zero-initializer to variables that don't have an // initial value, so skip warning for this case. if (!GVar->getInitializer()->isNullValue()) { - std::string warnMsg = - ("initial value of '" + GVar->getName() + - "' is not allowed in addrspace(" + - Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str(); - report_fatal_error(warnMsg.c_str()); + report_fatal_error("initial value of '" + GVar->getName() + + "' is not allowed in addrspace(" + + Twine(PTy->getAddressSpace()) + ")"); } } } @@ -1220,15 +1218,21 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { - O << " .u64 " << *getSymbol(GVar) << "["; + O << " .u64 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize / 8; } else { - O << " .u32 " << *getSymbol(GVar) << "["; + O << " .u32 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize / 4; } O << "]"; } else { - O << " .b8 " << *getSymbol(GVar) << "["; + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); + O << "["; O << ElementSize; O << "]"; } @@ -1236,7 +1240,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, aggBuffer.print(); O << "}"; } else { - O << " .b8 " << *getSymbol(GVar); + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); if (ElementSize) { O << "["; O << ElementSize; @@ -1244,7 +1249,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, } } } else { - O << " .b8 " << *getSymbol(GVar); + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); if (ElementSize) { O << "["; O << ElementSize; @@ -1351,7 +1357,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, O << " ."; O << getPTXFundamentalTypeStr(ETy); O << " "; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); return; } @@ -1366,9 +1372,11 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, case Type::ArrayTyID: case Type::VectorTyID: ElementSize = TD->getTypeStoreSize(ETy); - O << " .b8 " << *getSymbol(GVar) << "["; + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); + O << "["; if (ElementSize) { - O << itostr(ElementSize); + O << ElementSize; } O << "]"; break; @@ -1408,11 +1416,13 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O) { - O << *getSymbol(I->getParent()) << "_param_" << paramIndex; + getSymbol(I->getParent())->print(O, MAI); + O << "_param_" << paramIndex; } void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { @@ -1446,21 +1456,24 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { O << "\t.param .u64 .ptr .surfref "; else O << "\t.param .surfref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } else { // Default image is read_only if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .texref "; else O << "\t.param .texref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } } else { if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .samplerref "; else O << "\t.param .samplerref "; - O << *CurrentFnSym << "_param_" << paramIndex; + CurrentFnSym->print(O, MAI); + O << "_param_" << paramIndex; } continue; } @@ -1716,10 +1729,10 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { } if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { O << "generic("; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); O << ")"; } else { - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); } return; } @@ -1733,20 +1746,44 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { O << "generic("; - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); O << ")"; } else { - O << *getSymbol(GVar); + getSymbol(GVar)->print(O, MAI); } return; } else { - O << *lowerConstant(CPV); + lowerConstant(CPV)->print(O, MAI); return; } } llvm_unreachable("Not scalar type found in printScalarConstant()"); } +// These utility functions assure we get the right sequence of bytes for a given +// type even for big-endian machines +template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) { + int64_t vp = (int64_t)val; + for (unsigned i = 0; i < sizeof(T); ++i) { + p[i] = (unsigned char)vp; + vp >>= 8; + } +} +static void ConvertFloatToBytes(unsigned char *p, float val) { + int32_t *vp = (int32_t *)&val; + for (unsigned i = 0; i < sizeof(int32_t); ++i) { + p[i] = (unsigned char)*vp; + *vp >>= 8; + } +} +static void ConvertDoubleToBytes(unsigned char *p, double val) { + int64_t *vp = (int64_t *)&val; + for (unsigned i = 0; i < sizeof(int64_t); ++i) { + p[i] = (unsigned char)*vp; + *vp >>= 8; + } +} + void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { @@ -1760,30 +1797,30 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, return; } - unsigned char *ptr; + unsigned char ptr[8]; switch (CPV->getType()->getTypeID()) { case Type::IntegerTyID: { const Type *ETy = CPV->getType(); if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); - ptr = &c; + ConvertIntToBytes<>(ptr, c); aggBuffer->addBytes(ptr, 1, Bytes); } else if (ETy == Type::getInt16Ty(CPV->getContext())) { short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); - ptr = (unsigned char *)&int16; + ConvertIntToBytes<>(ptr, int16); aggBuffer->addBytes(ptr, 2, Bytes); } else if (ETy == Type::getInt32Ty(CPV->getContext())) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { int int32 = (int)(constInt->getZExtValue()); - ptr = (unsigned char *)&int32; + ConvertIntToBytes<>(ptr, int32); aggBuffer->addBytes(ptr, 4, Bytes); break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( ConstantFoldConstantExpression(Cexpr, *TD))) { int int32 = (int)(constInt->getZExtValue()); - ptr = (unsigned char *)&int32; + ConvertIntToBytes<>(ptr, int32); aggBuffer->addBytes(ptr, 4, Bytes); break; } @@ -1798,14 +1835,14 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, } else if (ETy == Type::getInt64Ty(CPV->getContext())) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { long long int64 = (long long)(constInt->getZExtValue()); - ptr = (unsigned char *)&int64; + ConvertIntToBytes<>(ptr, int64); aggBuffer->addBytes(ptr, 8, Bytes); break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( ConstantFoldConstantExpression(Cexpr, *TD))) { long long int64 = (long long)(constInt->getZExtValue()); - ptr = (unsigned char *)&int64; + ConvertIntToBytes<>(ptr, int64); aggBuffer->addBytes(ptr, 8, Bytes); break; } @@ -1827,11 +1864,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { float float32 = (float) CFP->getValueAPF().convertToFloat(); - ptr = (unsigned char *)&float32; + ConvertFloatToBytes(ptr, float32); aggBuffer->addBytes(ptr, 4, Bytes); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { double float64 = CFP->getValueAPF().convertToDouble(); - ptr = (unsigned char *)&float64; + ConvertDoubleToBytes(ptr, float64); aggBuffer->addBytes(ptr, 8, Bytes); } else { llvm_unreachable("unsupported fp const type"); @@ -1993,16 +2030,16 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) MCContext &Ctx = OutContext; if (CV->isNullValue() || isa<UndefValue>(CV)) - return MCConstantExpr::Create(0, Ctx); + return MCConstantExpr::create(0, Ctx); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) - return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + return MCConstantExpr::create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { const MCSymbolRefExpr *Expr = - MCSymbolRefExpr::Create(getSymbol(GV), Ctx); + MCSymbolRefExpr::create(getSymbol(GV), Ctx); if (ProcessingGeneric) { - return NVPTXGenericMCSymbolRefExpr::Create(Expr, Ctx); + return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); } else { return Expr; } @@ -2059,7 +2096,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) return Base; int64_t Offset = OffsetAI.getSExtValue(); - return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), Ctx); } @@ -2102,8 +2139,8 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) // the high bits so we are sure to get a proper truncation if the input is // a constant expr. unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); - return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); } // The MC library also has a right-shift operator, but it isn't consistently @@ -2113,7 +2150,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); } } } @@ -2123,7 +2160,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { switch (Expr.getKind()) { case MCExpr::Target: - return cast<MCTargetExpr>(&Expr)->PrintImpl(OS); + return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); case MCExpr::Constant: OS << cast<MCConstantExpr>(Expr).getValue(); return; @@ -2131,7 +2168,7 @@ void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { case MCExpr::SymbolRef: { const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); const MCSymbol &Sym = SRE.getSymbol(); - OS << Sym; + Sym.print(OS, MAI); return; } @@ -2256,11 +2293,11 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; default: diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 301c68609a29..f6f7685e76f9 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -165,10 +165,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { } if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { O << "generic("; - O << *Name; + Name->print(O, AP.MAI); O << ")"; } else { - O << *Name; + Name->print(O, AP.MAI); } } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { const MCExpr *Expr = diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index ae63caec1320..cfff0019b8d9 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -10,34 +10,54 @@ // When a load/store accesses the generic address space, checks whether the // address is casted from a non-generic address space. If so, remove this // addrspacecast because accessing non-generic address spaces is typically -// faster. Besides seeking addrspacecasts, this optimization also traces into -// the base pointer of a GEP. +// faster. Besides removing addrspacecasts directly used by loads/stores, this +// optimization also recursively traces into a GEP's pointer operand and a +// bitcast's source to find more eliminable addrspacecasts. // // For instance, the code below loads a float from an array allocated in // addrspace(3). // -// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* -// %1 = gep [10 x float]* %0, i64 0, i64 %i -// %2 = load float* %1 ; emits ld.f32 +// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* +// %1 = gep [10 x float]* %0, i64 0, i64 %i +// %2 = bitcast float* %1 to i32* +// %3 = load i32* %2 ; emits ld.u32 // -// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast -// and the GEP to expose more optimization opportunities to function +// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP, +// and the bitcast to expose more optimization opportunities to function // optimizeMemoryInst. The intermediate code looks like: // -// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i -// %1 = addrspacecast float addrspace(3)* %0 to float* -// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* +// %2 = addrspacecast i32 addrspace(3)* %1 to i32* +// %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly // // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed // generic pointers, and folds the load and the addrspacecast into a load from // the original address space. The final code looks like: // -// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i -// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* +// %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32 // // This pass may remove an addrspacecast in a different BB. Therefore, we // implement it as a FunctionPass. // +// TODO: +// The current implementation doesn't handle PHINodes. Eliminating +// addrspacecasts used by PHINodes is trickier because PHINodes can introduce +// loops in data flow. For example, +// +// %generic.input = addrspacecast float addrspace(3)* %input to float* +// loop: +// %y = phi [ %generic.input, %y2 ] +// %y2 = getelementptr %y, 1 +// %v = load %y2 +// br ..., label %loop, ... +// +// Marking %y2 shared depends on marking %y shared, but %y also data-flow +// depends on %y2. We probably need an iterative fix-point algorithm on handle +// this case. +// //===----------------------------------------------------------------------===// #include "NVPTX.h" @@ -62,17 +82,31 @@ class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { public: static char ID; NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} - bool runOnFunction(Function &F) override; +private: /// Optimizes load/store instructions. Idx is the index of the pointer operand /// (0 for load, and 1 for store). Returns true if it changes anything. bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); + /// Recursively traces into a GEP's pointer operand or a bitcast's source to + /// find an eliminable addrspacecast, and hoists that addrspacecast to the + /// outermost level. For example, this function transforms + /// bitcast(gep(gep(addrspacecast(X)))) + /// to + /// addrspacecast(bitcast(gep(gep(X)))). + /// + /// This reordering exposes to optimizeMemoryInstruction more + /// optimization opportunities on loads and stores. + /// + /// Returns true if this function succesfully hoists an eliminable + /// addrspacecast or V is already such an addrspacecast. /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, - /// indices)". This reordering exposes to optimizeMemoryInstruction more - /// optimization opportunities on loads and stores. Returns true if it changes - /// the program. - bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); + /// indices)". + bool hoistAddrSpaceCastFrom(Value *V, int Depth = 0); + /// Helper function for GEPs. + bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth); + /// Helper function for bitcasts. + bool hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); }; } @@ -85,11 +119,12 @@ INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", "Remove unnecessary non-generic-to-generic addrspacecasts", false, false) -// Decides whether removing Cast is valid and beneficial. Cast can be an -// instruction or a constant expression. -static bool IsEliminableAddrSpaceCast(Operator *Cast) { - // Returns false if not even an addrspacecast. - if (Cast->getOpcode() != Instruction::AddrSpaceCast) +// Decides whether V is an addrspacecast and shortcutting V in load/store is +// valid and beneficial. +static bool isEliminableAddrSpaceCast(Value *V) { + // Returns false if V is not even an addrspacecast. + Operator *Cast = dyn_cast<Operator>(V); + if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast) return false; Value *Src = Cast->getOperand(0); @@ -108,67 +143,119 @@ static bool IsEliminableAddrSpaceCast(Operator *Cast) { DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); } -bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( - GEPOperator *GEP) { - Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); - if (!Cast) +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP, + int Depth) { + if (!hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1)) return false; - if (!IsEliminableAddrSpaceCast(Cast)) - return false; + // That hoistAddrSpaceCastFrom succeeds implies GEP's pointer operand is now + // an eliminable addrspacecast. + assert(isEliminableAddrSpaceCast(GEP->getPointerOperand())); + Operator *Cast = cast<Operator>(GEP->getPointerOperand()); SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { - // %1 = gep (addrspacecast X), indices + // GEP = gep (addrspacecast X), indices // => - // %0 = gep X, indices - // %1 = addrspacecast %0 - GetElementPtrInst *NewGEPI = GetElementPtrInst::Create( + // NewGEP = gep X, indices + // NewASC = addrspacecast NewGEP + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( GEP->getSourceElementType(), Cast->getOperand(0), Indices, - GEP->getName(), GEPI); - NewGEPI->setIsInBounds(GEP->isInBounds()); - GEP->replaceAllUsesWith( - new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); + "", GEPI); + NewGEP->setIsInBounds(GEP->isInBounds()); + Value *NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); + NewASC->takeName(GEP); + GEP->replaceAllUsesWith(NewASC); } else { // GEP is a constant expression. - Constant *NewGEPCE = ConstantExpr::getGetElementPtr( + Constant *NewGEP = ConstantExpr::getGetElementPtr( GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), Indices, GEP->isInBounds()); GEP->replaceAllUsesWith( - ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); + ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType())); } return true; } -bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, - unsigned Idx) { - // If the pointer operand is a GEP, hoist the addrspacecast if any from the - // GEP to expose more optimization opportunites. - if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { - hoistAddrSpaceCastFromGEP(GEP); - } +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( + BitCastOperator *BC, int Depth) { + if (!hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1)) + return false; - // load/store (addrspacecast X) => load/store X if shortcutting the - // addrspacecast is valid and can improve performance. - // - // e.g., - // %1 = addrspacecast float addrspace(3)* %0 to float* - // %2 = load float* %1 - // -> - // %2 = load float addrspace(3)* %0 - // - // Note: the addrspacecast can also be a constant expression. - if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { - if (IsEliminableAddrSpaceCast(Cast)) { - MI->setOperand(Idx, Cast->getOperand(0)); - return true; - } + // That hoistAddrSpaceCastFrom succeeds implies BC's source operand is now + // an eliminable addrspacecast. + assert(isEliminableAddrSpaceCast(BC->getOperand(0))); + Operator *Cast = cast<Operator>(BC->getOperand(0)); + + // Cast = addrspacecast Src + // BC = bitcast Cast + // => + // Cast' = bitcast Src + // BC' = addrspacecast Cast' + Value *Src = Cast->getOperand(0); + Type *TypeOfNewCast = + PointerType::get(BC->getType()->getPointerElementType(), + Src->getType()->getPointerAddressSpace()); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) { + Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI); + Value *NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); + NewBC->takeName(BC); + BC->replaceAllUsesWith(NewBC); + } else { + // BC is a constant expression. + Constant *NewCast = + ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast); + Constant *NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); + BC->replaceAllUsesWith(NewBC); } + return true; +} + +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, + int Depth) { + // Returns true if V is already an eliminable addrspacecast. + if (isEliminableAddrSpaceCast(V)) + return true; + + // Limit the depth to prevent this recursive function from running too long. + const int MaxDepth = 20; + if (Depth >= MaxDepth) + return false; + + // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer + // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts + // that are not directly used by the load/store. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + return hoistAddrSpaceCastFromGEP(GEP, Depth); + + if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) + return hoistAddrSpaceCastFromBitCast(BC, Depth); return false; } +bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, + unsigned Idx) { + if (hoistAddrSpaceCastFrom(MI->getOperand(Idx))) { + // load/store (addrspacecast X) => load/store X if shortcutting the + // addrspacecast is valid and can improve performance. + // + // e.g., + // %1 = addrspacecast float addrspace(3)* %0 to float* + // %2 = load float* %1 + // -> + // %2 = load float addrspace(3)* %0 + // + // Note: the addrspacecast can also be a constant expression. + assert(isEliminableAddrSpaceCast(MI->getOperand(Idx))); + Operator *ASC = dyn_cast<Operator>(MI->getOperand(Idx)); + MI->setOperand(Idx, ASC->getOperand(0)); + return true; + } + return false; +} + bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { if (DisableFavorNonGeneric) return false; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index fa38a686fcbf..232a611d1760 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -613,6 +613,10 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes; break; + case ADDRESS_SPACE_PARAM: + Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 + : NVPTX::nvvm_ptr_gen_to_param; + break; } return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 805847a581fa..b5af72ab855a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3725,7 +3725,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 5142ae3cd88f..ed94775b3002 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -456,7 +456,8 @@ public: /// Used to guide target specific optimizations, like loop strength /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; @@ -497,12 +498,6 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { - // FIXME: Map different constraints differently. - return InlineAsm::Constraint_m; - } - const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp new file mode 100644 index 000000000000..24dcb122b94e --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp @@ -0,0 +1,170 @@ +//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pointer arguments to kernel functions need to be lowered specially. +// +// 1. Copy byval struct args to local memory. This is a preparation for handling +// cases like +// +// kernel void foo(struct A arg, ...) +// { +// struct A *p = &arg; +// ... +// ... = p->filed1 ... (this is no generic address for .param) +// p->filed2 = ... (this is no write access to .param) +// } +// +// 2. Convert non-byval pointer arguments of CUDA kernels to pointers in the +// global address space. This allows later optimizations to emit +// ld.global.*/st.global.* for accessing these pointer arguments. For +// example, +// +// define void @foo(float* %input) { +// %v = load float, float* %input, align 4 +// ... +// } +// +// becomes +// +// define void @foo(float* %input) { +// %input2 = addrspacecast float* %input to float addrspace(1)* +// %input3 = addrspacecast float addrspace(1)* %input2 to float* +// %v = load float, float* %input3, align 4 +// ... +// } +// +// Later, NVPTXFavorNonGenericAddrSpaces will optimize it to +// +// define void @foo(float* %input) { +// %input2 = addrspacecast float* %input to float addrspace(1)* +// %v = load float, float addrspace(1)* %input2, align 4 +// ... +// } +// +// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes +// don't cancel the addrspacecast pair this pass emits. +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "NVPTXTargetMachine.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace llvm { +void initializeNVPTXLowerKernelArgsPass(PassRegistry &); +} + +namespace { +class NVPTXLowerKernelArgs : public FunctionPass { + bool runOnFunction(Function &F) override; + + // handle byval parameters + void handleByValParam(Argument *); + // handle non-byval pointer parameters + void handlePointerParam(Argument *); + +public: + static char ID; // Pass identification, replacement for typeid + NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) {} + const char *getPassName() const override { + return "Lower pointer arguments of CUDA kernels"; + } + +private: + const NVPTXTargetMachine *TM; +}; +} // namespace + +char NVPTXLowerKernelArgs::ID = 1; + +INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args", + "Lower kernel arguments (NVPTX)", false, false) + +// ============================================================================= +// If the function had a byval struct ptr arg, say foo(%struct.x *byval %d), +// then add the following instructions to the first basic block: +// +// %temp = alloca %struct.x, align 8 +// %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)* +// %tv = load %struct.x addrspace(101)* %tempd +// store %struct.x %tv, %struct.x* %temp, align 8 +// +// The above code allocates some space in the stack and copies the incoming +// struct from param space to local space. +// Then replace all occurences of %d by %temp. +// ============================================================================= +void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) { + Function *Func = Arg->getParent(); + Instruction *FirstInst = &(Func->getEntryBlock().front()); + PointerType *PType = dyn_cast<PointerType>(Arg->getType()); + + assert(PType && "Expecting pointer type in handleByValParam"); + + Type *StructType = PType->getElementType(); + AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); + // Set the alignment to alignment of the byval parameter. This is because, + // later load/stores assume that alignment, and we are going to replace + // the use of the byval parameter with this alloca instruction. + AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); + Arg->replaceAllUsesWith(AllocA); + + Value *ArgInParam = new AddrSpaceCastInst( + Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(), + FirstInst); + LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst); + new StoreInst(LI, AllocA, FirstInst); +} + +void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) { + assert(!Arg->hasByValAttr() && + "byval params should be handled by handleByValParam"); + + Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin(); + Instruction *ArgInGlobal = new AddrSpaceCastInst( + Arg, PointerType::get(Arg->getType()->getPointerElementType(), + ADDRESS_SPACE_GLOBAL), + Arg->getName(), FirstInst); + Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(), + Arg->getName(), FirstInst); + // Replace with ArgInGeneric all uses of Args except ArgInGlobal. + Arg->replaceAllUsesWith(ArgInGeneric); + ArgInGlobal->setOperand(0, Arg); +} + + +// ============================================================================= +// Main function for this pass. +// ============================================================================= +bool NVPTXLowerKernelArgs::runOnFunction(Function &F) { + // Skip non-kernels. See the comments at the top of this file. + if (!isKernelFunction(F)) + return false; + + for (Argument &Arg : F.args()) { + if (Arg.getType()->isPointerTy()) { + if (Arg.hasByValAttr()) + handleByValParam(&Arg); + else if (TM && TM->getDrvInterface() == NVPTX::CUDA) + handlePointerParam(&Arg); + } + } + return true; +} + +FunctionPass * +llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) { + return new NVPTXLowerKernelArgs(TM); +} diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp deleted file mode 100644 index 68dfbb716139..000000000000 --- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp +++ /dev/null @@ -1,136 +0,0 @@ -//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Copy struct args to local memory. This is needed for kernel functions only. -// This is a preparation for handling cases like -// -// kernel void foo(struct A arg, ...) -// { -// struct A *p = &arg; -// ... -// ... = p->filed1 ... (this is no generic address for .param) -// p->filed2 = ... (this is no write access to .param) -// } -// -//===----------------------------------------------------------------------===// - -#include "NVPTX.h" -#include "NVPTXUtilities.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" - -using namespace llvm; - -namespace llvm { -void initializeNVPTXLowerStructArgsPass(PassRegistry &); -} - -namespace { -class NVPTXLowerStructArgs : public FunctionPass { - bool runOnFunction(Function &F) override; - - void handleStructPtrArgs(Function &); - void handleParam(Argument *); - -public: - static char ID; // Pass identification, replacement for typeid - NVPTXLowerStructArgs() : FunctionPass(ID) {} - const char *getPassName() const override { - return "Copy structure (byval *) arguments to stack"; - } -}; -} // namespace - -char NVPTXLowerStructArgs::ID = 1; - -INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args", - "Lower structure arguments (NVPTX)", false, false) - -void NVPTXLowerStructArgs::handleParam(Argument *Arg) { - Function *Func = Arg->getParent(); - Instruction *FirstInst = &(Func->getEntryBlock().front()); - PointerType *PType = dyn_cast<PointerType>(Arg->getType()); - - assert(PType && "Expecting pointer type in handleParam"); - - Type *StructType = PType->getElementType(); - AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); - - /* Set the alignment to alignment of the byval parameter. This is because, - * later load/stores assume that alignment, and we are going to replace - * the use of the byval parameter with this alloca instruction. - */ - AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); - - Arg->replaceAllUsesWith(AllocA); - - // Get the cvt.gen.to.param intrinsic - Type *CvtTypes[] = { - Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM), - Type::getInt8PtrTy(Func->getParent()->getContext(), - ADDRESS_SPACE_GENERIC)}; - Function *CvtFunc = Intrinsic::getDeclaration( - Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes); - - Value *BitcastArgs[] = { - new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(), - ADDRESS_SPACE_GENERIC), - Arg->getName(), FirstInst)}; - CallInst *CallCVT = - CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst); - - BitCastInst *BitCast = new BitCastInst( - CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM), - Arg->getName(), FirstInst); - LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst); - new StoreInst(LI, AllocA, FirstInst); -} - -// ============================================================================= -// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then -// add the following instructions to the first basic block : -// -// %temp = alloca %struct.x, align 8 -// %tt1 = bitcast %struct.x * %d to i8 * -// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2 -// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) * -// %tv = load %struct.x addrspace(101) * %tempd -// store %struct.x %tv, %struct.x * %temp, align 8 -// -// The above code allocates some space in the stack and copies the incoming -// struct from param space to local space. -// Then replace all occurences of %d by %temp. -// ============================================================================= -void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) { - for (Argument &Arg : F.args()) { - if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) { - handleParam(&Arg); - } - } -} - -// ============================================================================= -// Main function for this pass. -// ============================================================================= -bool NVPTXLowerStructArgs::runOnFunction(Function &F) { - // Skip non-kernels. See the comments at the top of this file. - if (!isKernelFunction(F)) - return false; - - handleStructPtrArgs(F); - return true; -} - -FunctionPass *llvm::createNVPTXLowerStructArgsPass() { - return new NVPTXLowerStructArgs(); -} diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp index 779b65ecc39f..3c98b9febf85 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -16,11 +16,11 @@ using namespace llvm; #define DEBUG_TYPE "nvptx-mcexpr" const NVPTXFloatMCExpr* -NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) { +NVPTXFloatMCExpr::create(VariantKind Kind, APFloat Flt, MCContext &Ctx) { return new (Ctx) NVPTXFloatMCExpr(Kind, Flt); } -void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const { +void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { bool Ignored; unsigned NumHex; APFloat APF = getAPFloat(); @@ -47,11 +47,14 @@ void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const { } const NVPTXGenericMCSymbolRefExpr* -NVPTXGenericMCSymbolRefExpr::Create(const MCSymbolRefExpr *SymExpr, +NVPTXGenericMCSymbolRefExpr::create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx) { return new (Ctx) NVPTXGenericMCSymbolRefExpr(SymExpr); } -void NVPTXGenericMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { - OS << "generic(" << *SymExpr << ")"; +void NVPTXGenericMCSymbolRefExpr::printImpl(raw_ostream &OS, + const MCAsmInfo *MAI) const { + OS << "generic("; + SymExpr->print(OS, MAI); + OS << ")"; } diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 8c6b219abd13..46b4b33e7e40 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -36,17 +36,17 @@ public: /// @name Construction /// @{ - static const NVPTXFloatMCExpr *Create(VariantKind Kind, APFloat Flt, + static const NVPTXFloatMCExpr *create(VariantKind Kind, APFloat Flt, MCContext &Ctx); - static const NVPTXFloatMCExpr *CreateConstantFPSingle(APFloat Flt, + static const NVPTXFloatMCExpr *createConstantFPSingle(APFloat Flt, MCContext &Ctx) { - return Create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx); + return create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx); } - static const NVPTXFloatMCExpr *CreateConstantFPDouble(APFloat Flt, + static const NVPTXFloatMCExpr *createConstantFPDouble(APFloat Flt, MCContext &Ctx) { - return Create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx); + return create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx); } /// @} @@ -61,14 +61,14 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *FindAssociatedSection() const override { return nullptr; } + MCSection *findAssociatedSection() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} @@ -92,7 +92,7 @@ public: /// @{ static const NVPTXGenericMCSymbolRefExpr - *Create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx); + *create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx); /// @} /// @name Accessors @@ -103,14 +103,14 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *FindAssociatedSection() const override { return nullptr; } + MCSection *findAssociatedSection() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index ac27c30aabab..a6466687bc7b 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -53,7 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); -void initializeNVPTXLowerStructArgsPass(PassRegistry &); +void initializeNVPTXLowerKernelArgsPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -69,7 +69,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); - initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); + initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry()); } static std::string computeDataLayout(bool is64Bit) { @@ -163,7 +163,13 @@ void NVPTXPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); + addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + // NVPTXLowerKernelArgs emits alloca for byval parameters which can often + // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs + // because we plan to merge NVPTXLowerKernelArgs and + // NVPTXFavorNonGenericAddrSpaces into one pass. + addPass(createSROAPass()); // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave // them unused. We could remove dead code in an ad-hoc manner, but that // requires manual work and might be error-prone. @@ -181,6 +187,9 @@ void NVPTXPassConfig::addIRPasses() { addPass(createEarlyCSEPass()); // Run NaryReassociate after EarlyCSE/GVN to be more effective. addPass(createNaryReassociatePass()); + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(createEarlyCSEPass()); } bool NVPTXPassConfig::addInstSelector() { diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 83de4d996993..1736d03961f7 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" @@ -772,7 +773,7 @@ public: if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) { int64_t Res; - if (TE->EvaluateAsConstant(Res)) + if (TE->evaluateAsConstant(Res)) return CreateContextImm(Res, S, E, IsPPC64); } @@ -814,13 +815,13 @@ addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) { } } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) { if (BinExpr->getOpcode() == MCBinaryExpr::Sub) { - const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(), + const MCExpr *NE = MCBinaryExpr::createSub(BinExpr->getRHS(), BinExpr->getLHS(), Ctx); Inst.addOperand(MCOperand::createExpr(NE)); return; } } - Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::CreateMinus(Expr, Ctx))); + Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::createMinus(Expr, Ctx))); } void PPCAsmParser::ProcessInstruction(MCInst &Inst, @@ -1330,7 +1331,7 @@ ExtractModifierFromExpr(const MCExpr *E, return nullptr; } - return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context); + return MCSymbolRefExpr::create(&SRE->getSymbol(), Context); } case MCExpr::Unary: { @@ -1338,7 +1339,7 @@ ExtractModifierFromExpr(const MCExpr *E, const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant); if (!Sub) return nullptr; - return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context); + return MCUnaryExpr::create(UE->getOpcode(), Sub, Context); } case MCExpr::Binary: { @@ -1362,7 +1363,7 @@ ExtractModifierFromExpr(const MCExpr *E, else return nullptr; - return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context); + return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context); } } @@ -1396,7 +1397,7 @@ FixupVariantKind(const MCExpr *E) { default: return E; } - return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, Context); + return MCSymbolRefExpr::create(&SRE->getSymbol(), Variant, Context); } case MCExpr::Unary: { @@ -1404,7 +1405,7 @@ FixupVariantKind(const MCExpr *E) { const MCExpr *Sub = FixupVariantKind(UE->getSubExpr()); if (Sub == UE->getSubExpr()) return E; - return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context); + return MCUnaryExpr::create(UE->getOpcode(), Sub, Context); } case MCExpr::Binary: { @@ -1413,7 +1414,7 @@ FixupVariantKind(const MCExpr *E) { const MCExpr *RHS = FixupVariantKind(BE->getRHS()); if (LHS == BE->getLHS() && RHS == BE->getRHS()) return E; - return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context); + return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context); } } @@ -1438,7 +1439,7 @@ ParseExpression(const MCExpr *&EVal) { PPCMCExpr::VariantKind Variant; const MCExpr *E = ExtractModifierFromExpr(EVal, Variant); if (E) - EVal = PPCMCExpr::Create(Variant, E, false, getParser().getContext()); + EVal = PPCMCExpr::create(Variant, E, false, getParser().getContext()); return false; } @@ -1485,7 +1486,7 @@ ParseDarwinExpression(const MCExpr *&EVal) { if (getLexer().isNot(AsmToken::RParen)) return Error(Parser.getTok().getLoc(), "expected ')'"); Parser.Lex(); // Eat the ')' - EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext()); + EVal = PPCMCExpr::create(Variant, EVal, false, getParser().getContext()); } return false; } @@ -1863,7 +1864,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { Error(L, "expected identifier in directive"); return false; } - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name)); if (getLexer().isNot(AsmToken::Comma)) { Error(L, "unexpected token in directive"); @@ -1936,19 +1937,19 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E, MCContext &Ctx) { switch (Variant) { case MCSymbolRefExpr::VK_PPC_LO: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_LO, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HI: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HI, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HA: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HA, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HIGHER: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HIGHERA: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HIGHEST: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx); case MCSymbolRefExpr::VK_PPC_HIGHESTA: - return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx); + return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx); default: return nullptr; } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 1a130e87bf3e..5e1d22789056 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -445,6 +445,6 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 86885e111dd1..72742dc3ee20 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -11,12 +11,12 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -142,13 +142,14 @@ public: // to resolve the fixup directly. Emit a relocation and leave // resolution of the final target address to the linker. if (const MCSymbolRefExpr *A = Target.getSymA()) { - const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol()); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - unsigned Other = MCELF::getOther(Data) << 2; - if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) - IsResolved = false; + if (const auto *S = dyn_cast<MCSymbolELF>(&A->getSymbol())) { + // The "other" values are stored in the last 6 bits of the second + // byte. The traditional defines for STO values assume the full byte + // and thus the shift to pack it. + unsigned Other = S->getOther() << 2; + if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) + IsResolved = false; + } } break; } @@ -176,7 +177,7 @@ public: bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0x60000000); + OW->write32(0x60000000); OW->WriteZeros(Count % 4); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 3e3489fc46aa..992be5b966c1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -11,9 +11,9 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCExpr.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -28,7 +28,7 @@ namespace { unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; - bool needsRelocateWithSymbol(const MCSymbolData &SD, + bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; }; } @@ -395,7 +395,7 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } -bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, +bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const { switch (Type) { default: @@ -407,7 +407,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, // The "other" values are stored in the last 6 bits of the second byte. // The traditional defines for STO values assume the full byte and thus // the shift to pack it. - unsigned Other = MCELF::getOther(SD) << 2; + unsigned Other = cast<MCSymbolELF>(Sym).getOther() << 2; return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0; } } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 17f4cd421641..95379246f301 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOpcodes.h" @@ -116,38 +117,19 @@ public: switch (Size) { case 4: if (IsLittleEndian) { - OS << (char)(Bits); - OS << (char)(Bits >> 8); - OS << (char)(Bits >> 16); - OS << (char)(Bits >> 24); + support::endian::Writer<support::little>(OS).write<uint32_t>(Bits); } else { - OS << (char)(Bits >> 24); - OS << (char)(Bits >> 16); - OS << (char)(Bits >> 8); - OS << (char)(Bits); + support::endian::Writer<support::big>(OS).write<uint32_t>(Bits); } break; case 8: // If we emit a pair of instructions, the first one is // always in the top 32 bits, even on little-endian. if (IsLittleEndian) { - OS << (char)(Bits >> 32); - OS << (char)(Bits >> 40); - OS << (char)(Bits >> 48); - OS << (char)(Bits >> 56); - OS << (char)(Bits); - OS << (char)(Bits >> 8); - OS << (char)(Bits >> 16); - OS << (char)(Bits >> 24); + uint64_t Swapped = (Bits << 32) | (Bits >> 32); + support::endian::Writer<support::little>(OS).write<uint64_t>(Swapped); } else { - OS << (char)(Bits >> 56); - OS << (char)(Bits >> 48); - OS << (char)(Bits >> 40); - OS << (char)(Bits >> 32); - OS << (char)(Bits >> 24); - OS << (char)(Bits >> 16); - OS << (char)(Bits >> 8); - OS << (char)(Bits); + support::endian::Writer<support::big>(OS).write<uint64_t>(Bits); } break; default: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 7204befe15ee..6b97d4c1456b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -19,12 +19,12 @@ using namespace llvm; #define DEBUG_TYPE "ppcmcexpr" const PPCMCExpr* -PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr, +PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin); } -void PPCMCExpr::PrintImpl(raw_ostream &OS) const { +void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { if (isDarwinSyntax()) { switch (Kind) { default: llvm_unreachable("Invalid kind!"); @@ -34,10 +34,10 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { } OS << '('; - getSubExpr()->print(OS); + getSubExpr()->print(OS, MAI); OS << ')'; } else { - getSubExpr()->print(OS); + getSubExpr()->print(OS, MAI); switch (Kind) { default: llvm_unreachable("Invalid kind!"); @@ -53,21 +53,21 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const { } bool -PPCMCExpr::EvaluateAsConstant(int64_t &Res) const { +PPCMCExpr::evaluateAsConstant(int64_t &Res) const { MCValue Value; - if (!getSubExpr()->EvaluateAsRelocatable(Value, nullptr, nullptr)) + if (!getSubExpr()->evaluateAsRelocatable(Value, nullptr, nullptr)) return false; if (!Value.isAbsolute()) return false; - Res = EvaluateAsInt64(Value.getConstant()); + Res = evaluateAsInt64(Value.getConstant()); return true; } int64_t -PPCMCExpr::EvaluateAsInt64(int64_t Value) const { +PPCMCExpr::evaluateAsInt64(int64_t Value) const { switch (Kind) { case VK_PPC_LO: return Value & 0xffff; @@ -90,16 +90,16 @@ PPCMCExpr::EvaluateAsInt64(int64_t Value) const { } bool -PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +PPCMCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { MCValue Value; - if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout, Fixup)) + if (!getSubExpr()->evaluateAsRelocatable(Value, Layout, Fixup)) return false; if (Value.isAbsolute()) { - int64_t Result = EvaluateAsInt64(Value.getConstant()); + int64_t Result = evaluateAsInt64(Value.getConstant()); if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) && (Result >= 0x8000)) return false; @@ -138,7 +138,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, Modifier = MCSymbolRefExpr::VK_PPC_HIGHESTA; break; } - Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context); + Sym = MCSymbolRefExpr::create(&Sym->getSymbol(), Modifier, Context); Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant()); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index ca72ccf0f76e..a641780516b3 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -34,7 +34,7 @@ private: const MCExpr *Expr; bool IsDarwin; - int64_t EvaluateAsInt64(int64_t Value) const; + int64_t evaluateAsInt64(int64_t Value) const; explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin) : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {} @@ -43,22 +43,22 @@ public: /// @name Construction /// @{ - static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr, bool isDarwin, MCContext &Ctx); - static const PPCMCExpr *CreateLo(const MCExpr *Expr, + static const PPCMCExpr *createLo(const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { - return Create(VK_PPC_LO, Expr, isDarwin, Ctx); + return create(VK_PPC_LO, Expr, isDarwin, Ctx); } - static const PPCMCExpr *CreateHi(const MCExpr *Expr, + static const PPCMCExpr *createHi(const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { - return Create(VK_PPC_HI, Expr, isDarwin, Ctx); + return create(VK_PPC_HI, Expr, isDarwin, Ctx); } - static const PPCMCExpr *CreateHa(const MCExpr *Expr, + static const PPCMCExpr *createHa(const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { - return Create(VK_PPC_HA, Expr, isDarwin, Ctx); + return create(VK_PPC_HA, Expr, isDarwin, Ctx); } /// @} @@ -77,19 +77,19 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); + MCSection *findAssociatedSection() const override { + return getSubExpr()->findAssociatedSection(); } // There are no TLS PPCMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} - bool EvaluateAsConstant(int64_t &Res) const; + bool evaluateAsConstant(int64_t &Res) const; static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 847437611a5f..1e8e8046669d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -16,14 +16,14 @@ #include "PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" #include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -70,8 +70,8 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { - Triple TheTriple(TT); +static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TheTriple) { bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 || TheTriple.getArch() == Triple::ppc64le); @@ -132,8 +132,14 @@ public: void emitAbiVersion(int AbiVersion) override { OS << "\t.abiversion " << AbiVersion << '\n'; } - void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { - OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n'; + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { + const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); + + OS << "\t.localentry\t"; + S->print(OS, MAI); + OS << ", "; + LocalOffset->print(OS, MAI); + OS << '\n'; } }; @@ -159,25 +165,21 @@ public: Flags |= (AbiVersion & ELF::EF_PPC64_ABI); MCA.setELFHeaderEFlags(Flags); } - void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { MCAssembler &MCA = getStreamer().getAssembler(); - MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S); int64_t Res; - if (!LocalOffset->EvaluateAsAbsolute(Res, MCA)) + if (!LocalOffset->evaluateAsAbsolute(Res, MCA)) report_fatal_error(".localentry expression must be absolute."); unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res); if (Res != ELF::decodePPC64LocalEntryOffset(Encoded)) report_fatal_error(".localentry expression cannot be encoded."); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - unsigned Other = MCELF::getOther(Data) << 2; + unsigned Other = S->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; Other |= Encoded; - MCELF::setOther(Data, Other >> 2); + S->setOther(Other); // For GAS compatibility, unless we already saw a .abiversion directive, // set e_flags to indicate ELFv2 ABI. @@ -185,22 +187,18 @@ public: if ((Flags & ELF::EF_PPC64_ABI) == 0) MCA.setELFHeaderEFlags(Flags | 2); } - void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override { + void emitAssignment(MCSymbol *S, const MCExpr *Value) override { + auto *Symbol = cast<MCSymbolELF>(S); // When encoding an assignment to set symbol A to symbol B, also copy // the st_other bits encoding the local entry point offset. if (Value->getKind() != MCExpr::SymbolRef) return; - const MCSymbol &RhsSym = - static_cast<const MCSymbolRefExpr *>(Value)->getSymbol(); - MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym); - MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol); - // The "other" values are stored in the last 6 bits of the second byte. - // The traditional defines for STO values assume the full byte and thus - // the shift to pack it. - unsigned Other = MCELF::getOther(SymbolData) << 2; + const auto &RhsSym = cast<MCSymbolELF>( + static_cast<const MCSymbolRefExpr *>(Value)->getSymbol()); + unsigned Other = Symbol->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; - Other |= (MCELF::getOther(Data) << 2) & ELF::STO_PPC64_LOCAL_MASK; - MCELF::setOther(SymbolData, Other >> 2); + Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK; + Symbol->setOther(Other); } }; @@ -217,7 +215,7 @@ public: void emitAbiVersion(int AbiVersion) override { llvm_unreachable("Unknown pseudo-op: .abiversion"); } - void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override { + void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { llvm_unreachable("Unknown pseudo-op: .localentry"); } }; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 3c906d2a51e3..9d7289658f0f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -24,7 +24,7 @@ using namespace llvm; namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { - bool RecordScatteredRelocation(MachObjectWriter *Writer, + bool recordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -38,10 +38,9 @@ class PPCMachObjectWriter : public MCMachObjectTargetWriter { public: PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/Is64Bit) {} + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override { @@ -187,9 +186,9 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout, /// \return false if falling back to using non-scattered relocation, /// otherwise true for normal scattered relocation. -/// based on X86MachObjectWriter::RecordScatteredRelocation -/// and ARMMachObjectWriter::RecordScatteredRelocation -bool PPCMachObjectWriter::RecordScatteredRelocation( +/// based on X86MachObjectWriter::recordScatteredRelocation +/// and ARMMachObjectWriter::recordScatteredRelocation +bool PPCMachObjectWriter::recordScatteredRelocation( MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, unsigned Log2Size, uint64_t &FixedValue) { @@ -206,28 +205,26 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData *A_SD = &Asm.getSymbolData(*A); - if (!A_SD->getFragment()) + if (!A->getFragment()) report_fatal_error("symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(*A, Layout); - uint64_t SecAddr = - Writer->getSectionAddress(A_SD->getFragment()->getParent()); + uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); FixedValue += SecAddr; uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbol *SB = &B->getSymbol(); - if (!B_SD->getFragment()) + if (!SB->getFragment()) report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // FIXME: is Type correct? see include/llvm/Support/MachO.h Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); - FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // FIXME: does FixedValue get used?? @@ -253,7 +250,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( } // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()? - // see PPCMCExpr::EvaluateAsRelocatableImpl() + // see PPCMCExpr::evaluateAsRelocatableImpl() uint32_t other_half = 0; switch (Type) { case MachO::PPC_RELOC_LO16_SECTDIFF: @@ -317,7 +314,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( // Q: are branch targets ever scattered? RelocType != MachO::PPC_RELOC_BR24 && RelocType != MachO::PPC_RELOC_BR14) { - RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); return; } @@ -346,7 +343,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( // Resolve constant variables. if (A->isVariable()) { int64_t Res; - if (A->getVariableValue()->EvaluateAsAbsolute( + if (A->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4f1c3c73e710..b42b0f9ef478 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -49,7 +49,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" @@ -181,14 +181,14 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, return; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_ConstantPoolIndex: O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); return; case MachineOperand::MO_BlockAddress: - O << *GetBlockAddressSymbol(MO.getBlockAddress()); + GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. @@ -222,8 +222,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } else { SymToPrint = getSymbol(GV); } - - O << *SymToPrint; + + SymToPrint->print(O, MAI); printOffset(MO.getOffset(), O); return; @@ -422,11 +422,11 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, TM.getRelocationModel() == Reloc::PIC_) Kind = MCSymbolRefExpr::VK_PLT; const MCSymbolRefExpr *TlsRef = - MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); + MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext); + const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, VK, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(Subtarget->isPPC64() ? PPC::BL8_NOP_TLS : PPC::BL_TLS) @@ -464,10 +464,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); const MCExpr *OffsExpr = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, + MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LOCAL, OutContext), - MCConstantExpr::Create(4, OutContext), + MCConstantExpr::create(4, OutContext), OutContext); // Emit the 'bl'. @@ -486,7 +486,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL) // FIXME: We would like an efficient form for this, so we don't have to do // a lot of extra uniquing. - .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); + .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); // Emit the label. OutStreamer->EmitLabel(PICBase); @@ -502,9 +502,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol(); TmpInst.setOpcode(PPC::LWZ); const MCExpr *Exp = - MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::create(PICOffset, MCSymbolRefExpr::VK_None, OutContext); const MCExpr *PB = - MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), + MCSymbolRefExpr::create(MF->getPICBaseSymbol(), MCSymbolRefExpr::VK_None, OutContext); const MCOperand TR = TmpInst.getOperand(1); @@ -512,7 +512,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri) TmpInst.getOperand(1) = - MCOperand::createExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext)); + MCOperand::createExpr(MCBinaryExpr::createSub(Exp, PB, OutContext)); TmpInst.getOperand(0) = TR; TmpInst.getOperand(2) = PICR; EmitToStreamer(*OutStreamer, TmpInst); @@ -547,19 +547,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (PL == PICLevel::Small) { const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); } else { MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None, + MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext); const MCExpr *PB = - MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")), + MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext); - Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext); + Exp = MCBinaryExpr::createSub(Exp, PB, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); } EmitToStreamer(*OutStreamer, TmpInst); @@ -592,7 +592,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -639,7 +639,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -681,7 +681,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -715,7 +715,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -729,7 +729,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -748,7 +748,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -763,10 +763,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL) // FIXME: We would like an efficient form for this, so we don't have to do // a lot of extra uniquing. - .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext))); + .addExpr(MCSymbolRefExpr::create(NextInstr, OutContext))); const MCExpr *OffsExpr = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext), - MCSymbolRefExpr::Create(GOTRef, OutContext), + MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol, OutContext), + MCSymbolRefExpr::create(GOTRef, OutContext), OutContext); OutStreamer->EmitLabel(GOTRef); OutStreamer->EmitValue(OffsExpr, 4); @@ -786,10 +786,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::PPC32GOT: { MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); const MCExpr *SymGotTlsL = - MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, + MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, OutContext); const MCExpr *SymGotTlsHA = - MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, + MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI) .addReg(MI->getOperand(0).getReg()) @@ -808,7 +808,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -825,7 +825,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create( + const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create( MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, OutContext); @@ -853,7 +853,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) @@ -870,7 +870,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create( + const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create( MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, OutContext); @@ -900,7 +900,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymDtprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, OutContext); EmitToStreamer( *OutStreamer, @@ -920,7 +920,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymDtprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) @@ -1012,8 +1012,8 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { // The GOT pointer points to the middle of the GOT, in order to reference the // entire 64kB range. 0x8000 is the midpoint. const MCExpr *tocExpr = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext), - MCConstantExpr::Create(0x8000, OutContext), + MCBinaryExpr::createAdd(MCSymbolRefExpr::create(CurrentPos, OutContext), + MCConstantExpr::create(0x8000, OutContext), OutContext); OutStreamer->EmitAssignment(TOCSym, tocExpr); @@ -1036,10 +1036,10 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutStreamer->EmitLabel(RelocSymbol); const MCExpr *OffsExpr = - MCBinaryExpr::CreateSub( - MCSymbolRefExpr::Create(OutContext.getOrCreateSymbol(Twine(".LTOC")), + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext), - MCSymbolRefExpr::Create(PICBase, OutContext), + MCSymbolRefExpr::create(PICBase, OutContext), OutContext); OutStreamer->EmitValue(OffsExpr, 4); OutStreamer->EmitLabel(CurrentFnSym); @@ -1062,12 +1062,12 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { MCSymbol *Symbol1 = CurrentFnSymForSize; // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. - OutStreamer->EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), + OutStreamer->EmitValue(MCSymbolRefExpr::create(Symbol1, OutContext), 8 /*size*/); MCSymbol *Symbol2 = OutContext.getOrCreateSymbol(StringRef(".TOC.")); // Generates a R_PPC64_TOC relocation for TOC base insertion. OutStreamer->EmitValue( - MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext), + MCSymbolRefExpr::create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext), 8/*size*/); // Emit a null environment pointer. OutStreamer->EmitIntValue(0, 8 /* size */); @@ -1133,22 +1133,22 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol(); OutStreamer->EmitLabel(GlobalEntryLabel); const MCSymbolRefExpr *GlobalEntryLabelExp = - MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext); + MCSymbolRefExpr::create(GlobalEntryLabel, OutContext); MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC.")); const MCExpr *TOCDeltaExpr = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext), + MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext), GlobalEntryLabelExp, OutContext); const MCExpr *TOCDeltaHi = - PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext); + PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) .addReg(PPC::X2) .addReg(PPC::X12) .addExpr(TOCDeltaHi)); const MCExpr *TOCDeltaLo = - PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext); + PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI) .addReg(PPC::X2) .addReg(PPC::X2) @@ -1157,16 +1157,16 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { MCSymbol *LocalEntryLabel = OutContext.createTempSymbol(); OutStreamer->EmitLabel(LocalEntryLabel); const MCSymbolRefExpr *LocalEntryLabelExp = - MCSymbolRefExpr::Create(LocalEntryLabel, OutContext); + MCSymbolRefExpr::create(LocalEntryLabel, OutContext); const MCExpr *LocalOffsetExp = - MCBinaryExpr::CreateSub(LocalEntryLabelExp, + MCBinaryExpr::createSub(LocalEntryLabelExp, GlobalEntryLabelExp, OutContext); PPCTargetStreamer *TS = static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer()); if (TS) - TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp); + TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp); } } @@ -1305,10 +1305,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer->EmitLabel(Stub); OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext); - const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext); + const MCExpr *Anon = MCSymbolRefExpr::create(AnonSymbol, OutContext); + const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext); const MCExpr *Sub = - MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext); + MCBinaryExpr::createSub(LazyPtrExpr, Anon, OutContext); // mflr r0 EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); @@ -1318,7 +1318,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // mflr r11 EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) - const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, true, OutContext); + const MCExpr *SubHa16 = PPCMCExpr::createHa(Sub, true, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) @@ -1328,7 +1328,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) - const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, true, OutContext); + const MCExpr *SubLo16 = PPCMCExpr::createLo(Sub, true, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(SubLo16).addExpr(SubLo16) @@ -1364,7 +1364,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { MCSymbol *Stub = Stubs[i].first; MCSymbol *RawSym = Stubs[i].second.getPointer(); MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); - const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext); + const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext); OutStreamer->SwitchSection(StubSection); EmitAlignment(4); @@ -1373,7 +1373,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // lis r11, ha16(LazyPtr) const MCExpr *LazyPtrHa16 = - PPCMCExpr::CreateHa(LazyPtrExpr, true, OutContext); + PPCMCExpr::createHa(LazyPtrExpr, true, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LIS) .addReg(PPC::R11) .addExpr(LazyPtrHa16)); @@ -1381,7 +1381,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // ldu r12, lo16(LazyPtr)(r11) // lwzu r12, lo16(LazyPtr)(r11) const MCExpr *LazyPtrLo16 = - PPCMCExpr::CreateLo(LazyPtrExpr, true, OutContext); + PPCMCExpr::createLo(LazyPtrExpr, true, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) @@ -1465,7 +1465,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { // need to be indirect and pc-rel. We accomplish this by using NLPs. // However, sometimes the types are local to the file. So we need to // fill in the value for the NLP in those cases. - OutStreamer->EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), + OutStreamer->EmitValue(MCSymbolRefExpr::create(MCSym.getPointer(), OutContext), isPPC64 ? 8 : 4/*size*/); } @@ -1484,7 +1484,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer->EmitLabel(Stubs[i].first); // .long _foo OutStreamer->EmitValue(MCSymbolRefExpr:: - Create(Stubs[i].second.getPointer(), + create(Stubs[i].second.getPointer(), OutContext), isPPC64 ? 8 : 4/*size*/); } diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 0b8e23c4ebf8..a561d5b1190a 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -148,6 +148,9 @@ class PPCFastISel final : public FastISel { bool isVSFRCRegister(unsigned Register) const { return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID; } + bool isVSSRCRegister(unsigned Register) const { + return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID; + } bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg); bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, @@ -503,8 +506,11 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // If this is a potential VSX load with an offset of 0, a VSX indexed load can // be used. + bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg); bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg); - if (IsVSFRC && (Opc == PPC::LFD) && + bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS; + bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD; + if ((Is32VSXLoad || Is64VSXLoad) && (Addr.BaseType != Address::FrameIndexBase) && UseOffset && (Addr.Offset == 0)) { UseOffset = false; @@ -518,7 +524,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { // VSX only provides an indexed load. - if (IsVSFRC && Opc == PPC::LFD) return false; + if (Is32VSXLoad || Is64VSXLoad) return false; MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( @@ -532,7 +538,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // Base reg with offset in range. } else if (UseOffset) { // VSX only provides an indexed load. - if (IsVSFRC && Opc == PPC::LFD) return false; + if (Is32VSXLoad || Is64VSXLoad) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); @@ -555,7 +561,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, case PPC::LWA: Opc = PPC::LWAX; break; case PPC::LWA_32: Opc = PPC::LWAX_32; break; case PPC::LD: Opc = PPC::LDX; break; - case PPC::LFS: Opc = PPC::LFSX; break; + case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break; case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break; } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) @@ -636,9 +642,12 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // If this is a potential VSX store with an offset of 0, a VSX indexed store // can be used. + bool IsVSSRC = isVSSRCRegister(SrcReg); bool IsVSFRC = isVSFRCRegister(SrcReg); - if (IsVSFRC && (Opc == PPC::STFD) && - (Addr.BaseType != Address::FrameIndexBase) && UseOffset && + bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS; + bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD; + if ((Is32VSXStore || Is64VSXStore) && + (Addr.BaseType != Address::FrameIndexBase) && UseOffset && (Addr.Offset == 0)) { UseOffset = false; } @@ -648,7 +657,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // into a RegBase. if (Addr.BaseType == Address::FrameIndexBase) { // VSX only provides an indexed store. - if (IsVSFRC && Opc == PPC::STFD) return false; + if (Is32VSXStore || Is64VSXStore) return false; MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( @@ -665,7 +674,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // Base reg with offset in range. } else if (UseOffset) { // VSX only provides an indexed store. - if (IsVSFRC && Opc == PPC::STFD) return false; + if (Is32VSXStore || Is64VSXStore) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); @@ -684,7 +693,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STH8: Opc = PPC::STHX8; break; case PPC::STW8: Opc = PPC::STWX8; break; case PPC::STD: Opc = PPC::STDX; break; - case PPC::STFS: Opc = PPC::STFSX; break; + case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break; case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break; } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bb9315e9520e..2600ee5db179 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10825,7 +10825,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) return false; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index c93de430fd05..7fd3f9c3de3d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -556,7 +556,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 9685bac2aebb..d08b80871f3e 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1078,6 +1078,82 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), "xssubsp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; + + // FMA Instructions + let BaseName = "XSMADDASP" in { + let isCommutable = 1 in + def XSMADDASP : XX3Form<60, 1, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMADDMSP : XX3Form<60, 9, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSMSUBASP" in { + let isCommutable = 1 in + def XSMSUBASP : XX3Form<60, 17, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fma f32:$XA, f32:$XB, + (fneg f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMSUBMSP : XX3Form<60, 25, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMADDASP" in { + let isCommutable = 1 in + def XSNMADDASP : XX3Form<60, 129, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMADDMSP : XX3Form<60, 137, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMSUBASP" in { + let isCommutable = 1 in + def XSNMSUBASP : XX3Form<60, 145, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + (fneg f32:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMSUBMSP : XX3Form<60, 153, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } } // AddedComplexity = 400 } // HasP8Vector diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index f1e28651aea2..05cb6e11db67 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -142,28 +142,28 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) RefKind = MCSymbolRefExpr::VK_PLT; - const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), Ctx), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); // Subtract off the PIC base if required. if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) { const MachineFunction *MF = MO.getParent()->getParent()->getParent(); - const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); - Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx); + const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); + Expr = MCBinaryExpr::createSub(Expr, PB, Ctx); } // Add ha16() / lo16() markers if required. switch (access) { case PPCII::MO_LO: - Expr = PPCMCExpr::CreateLo(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx); break; case PPCII::MO_HA: - Expr = PPCMCExpr::CreateHa(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx); break; } @@ -193,7 +193,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), AP.OutContext)); break; case MachineOperand::MO_GlobalAddress: diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp index 9ad134070082..9ee5db938b67 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -55,9 +55,9 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal( const MCExpr *PPC64LinuxTargetObjectFile:: getDebugThreadLocalSymbol(const MCSymbol *Sym) const { const MCExpr *Expr = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext()); - return MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(0x8000, getContext()), + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext()); + return MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(0x8000, getContext()), getContext()); } diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index 8aaf5e188907..dbe7617d3542 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -20,7 +20,7 @@ public: virtual void emitTCEntry(const MCSymbol &S) = 0; virtual void emitMachine(StringRef CPU) = 0; virtual void emitAbiVersion(int AbiVersion) = 0; - virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0; + virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0; }; } diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 9b3606372035..0a05d25189b0 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -105,7 +105,7 @@ namespace ShaderType { /// a separate piece of memory that is unique from other /// memory locations. namespace AMDGPUAS { -enum AddressSpaces { +enum AddressSpaces : unsigned { PRIVATE_ADDRESS = 0, ///< Address space for private memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). CONSTANT_ADDRESS = 2, ///< Address space for constant memory @@ -137,7 +137,10 @@ enum AddressSpaces { CONSTANT_BUFFER_14 = 22, CONSTANT_BUFFER_15 = 23, ADDRESS_NONE = 24, ///< Address space for unknown memory. - LAST_ADDRESS = ADDRESS_NONE + LAST_ADDRESS = ADDRESS_NONE, + + // Some places use this if the address space can't be determined. + UNKNOWN_ADDRESS_SPACE = ~0u }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index d00ae78c99b0..d56838ec2019 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -257,9 +257,22 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); + setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { @@ -301,6 +314,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SMIN, MVT::i32, Legal); + setOperationAction(ISD::UMIN, MVT::i32, Legal); + setOperationAction(ISD::SMAX, MVT::i32, Legal); + setOperationAction(ISD::UMAX, MVT::i32, Legal); + if (!Subtarget->hasFFBH()) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); @@ -962,17 +980,17 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_imax: - return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umax: - return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::UMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_imin: - return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::SMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umin: - return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::UMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umul24: return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, @@ -1050,7 +1068,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op.getOperand(1)); - return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); + return DAG.getNode(ISD::SMAX, DL, VT, Neg, Op.getOperand(1)); } /// Linear Interpolation @@ -1149,7 +1167,7 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL, return SDValue(); } -/// \brief Generate Min/Max node +// FIXME: Remove this when combines added to DAGCombiner. SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, EVT VT, SDValue LHS, @@ -1165,22 +1183,22 @@ SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, switch (CCOpcode) { case ISD::SETULE: case ISD::SETULT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX; + unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETLE: case ISD::SETLT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX; + unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETGT: case ISD::SETGE: { - unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN; + unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN; return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETUGE: case ISD::SETUGT: { - unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN; + unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN; return DAG.getNode(Opc, DL, VT, LHS, RHS); } default: @@ -2644,11 +2662,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(COS_HW) NODE_NAME_CASE(SIN_HW) NODE_NAME_CASE(FMAX_LEGACY) - NODE_NAME_CASE(SMAX) - NODE_NAME_CASE(UMAX) NODE_NAME_CASE(FMIN_LEGACY) - NODE_NAME_CASE(SMIN) - NODE_NAME_CASE(UMIN) NODE_NAME_CASE(FMAX3) NODE_NAME_CASE(SMAX3) NODE_NAME_CASE(UMAX3) @@ -2794,14 +2808,6 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( break; } - case AMDGPUISD::SMAX: - case AMDGPUISD::UMAX: - case AMDGPUISD::SMIN: - case AMDGPUISD::UMIN: - computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1), - KnownZero, KnownOne, DAG, Depth); - break; - case AMDGPUISD::CARRY: case AMDGPUISD::BORROW: { KnownZero = APInt::getHighBitsSet(32, 31); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index c9f198129efc..fbb7d3c88437 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -228,11 +228,7 @@ enum NodeType : unsigned { COS_HW, SIN_HW, FMAX_LEGACY, - SMAX, - UMAX, FMIN_LEGACY, - SMIN, - UMIN, FMAX3, SMAX3, UMAX3, diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index f0f10ca59723..64e295f1144c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -152,17 +152,15 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const return true; } -MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // TODO: Implement this function return nullptr; } -MachineInstr * -AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineInstr *LoadMI) const { +MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { // TODO: Implement this function return nullptr; } diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index 07042b59be7b..8fd27a17638b 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -87,9 +87,11 @@ public: protected: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; public: diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 790f34cea8cd..b413897d9d23 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -94,16 +94,6 @@ def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, [] >; -// out = min(a, b) a and b are signed ints -def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - -// out = min(a, b) a and b are unsigned ints -def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] ->; - // FIXME: TableGen doesn't like commutative instructions with more // than 2 operands. // out = max(a, b, c) a, b and c are floats diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index 9565e3fd5fa6..20831460b933 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -64,25 +64,25 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::createReg(MO.getReg()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName())); - MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(Sym, Ctx)); + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); break; } case MachineOperand::MO_TargetIndex: { assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); MCOp = MCOperand::createExpr(Expr); break; } case MachineOperand::MO_ExternalSymbol: { MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName())); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); MCOp = MCOperand::createExpr(Expr); break; } diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index b262cdf57712..a5a901c739d4 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -272,7 +272,7 @@ public: } bool enableSubRegLiveness() const override { - return false; + return true; } }; diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp index 19bffd575117..95025a6e29f1 100644 --- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp @@ -1084,7 +1084,7 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { case AsmToken::Identifier: Operands.push_back(AMDGPUOperand::CreateExpr( - MCSymbolRefExpr::Create(getContext().getOrCreateSymbol( + MCSymbolRefExpr::create(getContext().getOrCreateSymbol( Parser.getTok().getString()), getContext()), S)); Parser.Lex(); return MatchOperand_Success; diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 279c3eb1912f..f70676943bb3 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -337,7 +337,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } else if (Op.isExpr()) { const MCExpr *Exp = Op.getExpr(); - Exp->print(O); + Exp->print(O, &MAI); } else { llvm_unreachable("unknown operand type in printOperand"); } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index 2605ca52dfde..3713223697ed 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -25,18 +25,18 @@ namespace { class AMDGPUMCObjectWriter : public MCObjectWriter { public: AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {} - void ExecutePostLayoutBinding(MCAssembler &Asm, + void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override { //XXX: Implement if necessary. } - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override { assert(!"Not implemented"); } - void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; }; @@ -64,7 +64,7 @@ public: } //End anonymous namespace -void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm, +void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) { Asm.writeSectionData(&*I, Layout); diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 19d89fb27caa..028a86dfc7ad 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -11,7 +11,7 @@ #include "AMDGPUMCAsmInfo.h" using namespace llvm; -AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfoELF() { +AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { HasSingleParameterDotFile = false; //===------------------------------------------------------------------===// MaxInstLength = 16; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h index 8f75c76c4257..a5bac51e356f 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -17,7 +17,7 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { -class StringRef; +class Triple; // If you need to create another MCAsmInfo class, which inherits from MCAsmInfo, // you will need to make sure your new class sets PrivateGlobalPrefix to @@ -26,7 +26,7 @@ class StringRef; // with 'L' as a local symbol. class AMDGPUMCAsmInfo : public MCAsmInfoELF { public: - explicit AMDGPUMCAsmInfo(StringRef &TT); + explicit AMDGPUMCAsmInfo(const Triple &TT); }; } // namespace llvm #endif diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index a809564e3be0..e683498d52a5 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -148,15 +149,11 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { } void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { - for (unsigned i = 0; i < 4; i++) { - OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); - } + support::endian::Writer<support::little>(OS).write(Value); } void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { - for (unsigned i = 0; i < 8; i++) { - EmitByte((Value >> (8 * i)) & 0xff, OS); - } + support::endian::Writer<support::little>(OS).write(Value); } unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 7126c82c0331..7beed092b3f7 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -781,10 +781,10 @@ def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; -def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; -def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; -def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; -def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; +def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", smax>; +def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", smin>; +def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", umax>; +def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", umin>; def SETE_INT : R600_2OP < 0x3A, "SETE_INT", diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 52bf2aeb87de..12d08cf4c7f5 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -155,7 +155,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); @@ -211,6 +210,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FMINNUM); setTargetDAGCombine(ISD::FMAXNUM); + setTargetDAGCombine(ISD::SMIN); + setTargetDAGCombine(ISD::SMAX); + setTargetDAGCombine(ISD::UMIN); + setTargetDAGCombine(ISD::UMAX); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::AND); @@ -251,47 +254,83 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &, return false; } -// FIXME: This really needs an address space argument. The immediate offset -// size is different for different sets of memory instruction sets. - -// The single offset DS instructions have a 16-bit unsigned byte offset. -// -// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r + -// r + i with addr64. 32-bit has more addressing mode options. Depending on the -// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i). -// -// SMRD instructions have an 8-bit, dword offset. -// bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; - // Allow a 16-bit unsigned immediate field, since this is what DS instructions - // use. - if (!isUInt<16>(AM.BaseOffs)) - return false; + switch (AS) { + case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions? + case AMDGPUAS::PRIVATE_ADDRESS: + case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: { + // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and + // additionally can do r + r + i with addr64. 32-bit has more addressing + // mode options. Depending on the resource constant, it can also do + // (i64 r0) + (i32 r1) * (i14 i). + // + // SMRD instructions have an 8-bit, dword offset. + // + // Assume nonunifom access, since the address space isn't enough to know + // what instruction we will use, and since we don't know if this is a load + // or store and scalar stores are only available on VI. + // + // We also know if we are doing an extload, we can't do a scalar load. + // + // Private arrays end up using a scratch buffer most of the time, so also + // assume those use MUBUF instructions. Scratch loads / stores are currently + // implemented as mubuf instructions with offen bit set, so slightly + // different than the normal addr64. + if (!isUInt<12>(AM.BaseOffs)) + return false; - // Only support r+r, - switch (AM.Scale) { - case 0: // "r+i" or just "i", depending on HasBaseReg. - break; - case 1: - if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + // FIXME: Since we can split immediate into soffset and immediate offset, + // would it make sense to allow any immediate? + + switch (AM.Scale) { + case 0: // r + i or just i, depending on HasBaseReg. + return true; + case 1: + return true; // We have r + r or r + i. + case 2: + if (AM.HasBaseReg) { + // Reject 2 * r + r. + return false; + } + + // Allow 2 * r as r + r + // Or 2 * r + i is allowed as r + r + i. + return true; + default: // Don't allow n * r return false; - // Otherwise we have r+r or r+i. - break; - case 2: - if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + } + } + case AMDGPUAS::LOCAL_ADDRESS: + case AMDGPUAS::REGION_ADDRESS: { + // Basic, single offset DS instructions allow a 16-bit unsigned immediate + // field. + // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have + // an 8-bit dword offset but we don't know the alignment here. + if (!isUInt<16>(AM.BaseOffs)) return false; - // Allow 2*r as r+r. - break; - default: // Don't allow n * r + + if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg. + return true; + + if (AM.Scale == 1 && AM.HasBaseReg) + return true; + return false; } - - return true; + case AMDGPUAS::FLAT_ADDRESS: { + // Flat instructions do not have offsets, and only have the register + // address. + return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); + } + default: + llvm_unreachable("unhandled address space"); + } } bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, @@ -368,6 +407,12 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return TII->isInlineConstant(Imm); } +static EVT toIntegerVT(EVT VT) { + if (VT.isVector()) + return VT.changeVectorElementTypeToInteger(); + return MVT::getIntegerVT(VT.getSizeInBits()); +} + SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc SL, SDValue Chain, unsigned Offset, bool Signed) const { @@ -380,20 +425,42 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, Type *Ty = VT.getTypeForEVT(*DAG.getContext()); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS); PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); - SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, - MRI.getLiveInVirtReg(InputPtrReg), MVT::i64); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr, - DAG.getConstant(Offset, SL, MVT::i64)); + SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, + MRI.getLiveInVirtReg(InputPtrReg), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(Offset, SL, PtrVT)); SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS)); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); - return DAG.getLoad(ISD::UNINDEXED, Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, + unsigned Align = DL->getABITypeAlignment(Ty); + + if (VT != MemVT && VT.isFloatingPoint()) { + // Do an integer load and convert. + // FIXME: This is mostly because load legalization after type legalization + // doesn't handle FP extloads. + assert(VT.getScalarType() == MVT::f32 && + MemVT.getScalarType() == MVT::f16); + + EVT IVT = toIntegerVT(VT); + EVT MemIVT = toIntegerVT(MemVT); + SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD, + IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT, + false, // isVolatile + true, // isNonTemporal + true, // isInvariant + Align); // Alignment + return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load); + } + + ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT, false, // isVolatile true, // isNonTemporal true, // isInvariant - DL->getABITypeAlignment(Ty)); // Alignment + Align); // Alignment } SDValue SITargetLowering::LowerFormalArguments( @@ -1570,15 +1637,15 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) { switch (Opc) { case ISD::FMAXNUM: return AMDGPUISD::FMAX3; - case AMDGPUISD::SMAX: + case ISD::SMAX: return AMDGPUISD::SMAX3; - case AMDGPUISD::UMAX: + case ISD::UMAX: return AMDGPUISD::UMAX3; case ISD::FMINNUM: return AMDGPUISD::FMIN3; - case AMDGPUISD::SMIN: + case ISD::SMIN: return AMDGPUISD::SMIN3; - case AMDGPUISD::UMIN: + case ISD::UMIN: return AMDGPUISD::UMIN3; default: llvm_unreachable("Not a min/max opcode"); @@ -1664,10 +1731,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performSetCCCombine(N, DCI); case ISD::FMAXNUM: // TODO: What about fmax_legacy? case ISD::FMINNUM: - case AMDGPUISD::SMAX: - case AMDGPUISD::SMIN: - case AMDGPUISD::UMAX: - case AMDGPUISD::UMIN: { + case ISD::SMAX: + case ISD::SMIN: + case ISD::UMAX: + case ISD::UMIN: { if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG && N->getValueType(0) != MVT::f64 && getTargetMachine().getOptLevel() > CodeGenOpt::None) diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index a95354c38816..a956b013bdb1 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -63,7 +63,7 @@ public: EVT /*VT*/) const override; bool isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const override; + Type *Ty, unsigned AS) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 839c2e9ecdd2..2f39074802b7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -224,16 +224,16 @@ defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32", } // End Uses = [SCC] defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32", - [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] + [(set i32:$dst, (smin i32:$src0, i32:$src1))] >; defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32", - [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] + [(set i32:$dst, (umin i32:$src0, i32:$src1))] >; defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32", - [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] + [(set i32:$dst, (smax i32:$src0, i32:$src1))] >; defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32", - [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] + [(set i32:$dst, (umax i32:$src0, i32:$src1))] >; } // End Defs = [SCC] diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 388cb65c99cb..6b3b51afb4bd 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -408,7 +408,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, uint64_t ImmValue = IsImm ? MCValOp.getImm() : 0; const MCExpr *ValExpr; if (IsImm) - ValExpr = MCConstantExpr::Create(ImmValue, getContext()); + ValExpr = MCConstantExpr::create(ImmValue, getContext()); else ValExpr = MCValOp.getExpr(); @@ -417,7 +417,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, if (!IsImm || (ImmValue & ~0x1fff)) { MCInst TmpInst; const MCExpr *Expr = - SparcMCExpr::Create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext()); + SparcMCExpr::create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext()); TmpInst.setLoc(IDLoc); TmpInst.setOpcode(SP::SETHIi); TmpInst.addOperand(MCRegOp); @@ -429,7 +429,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, if (!IsImm || ((ImmValue & 0x1fff) != 0 || ImmValue == 0)) { MCInst TmpInst; const MCExpr *Expr = - SparcMCExpr::Create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext()); + SparcMCExpr::create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext()); TmpInst.setLoc(IDLoc); TmpInst.setOpcode(SP::ORri); TmpInst.addOperand(MCRegOp); @@ -774,11 +774,11 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op, E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); - const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + const MCExpr *Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); if (isCall && getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) - Res = SparcMCExpr::Create(SparcMCExpr::VK_Sparc_WPLT30, Res, + Res = SparcMCExpr::create(SparcMCExpr::VK_Sparc_WPLT30, Res, getContext()); Op = SparcOperand::CreateImm(Res, S, E); } @@ -1010,7 +1010,7 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal, break; } - EVal = SparcMCExpr::Create(VK, subExpr, getContext()); + EVal = SparcMCExpr::create(VK, subExpr, getContext()); return true; } diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp index bac2617b0f3e..5d714fe4da92 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -120,7 +120,7 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, } assert(MO.isExpr() && "Unknown operand kind in printOperand"); - MO.getExpr()->print(O); + MO.getExpr()->print(O, &MAI); } void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum, diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 3792a596a6b8..9388527004f5 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -260,7 +260,7 @@ namespace { uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0x01000000); + OW->write32(0x01000000); return true; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 124cb3b4b98b..280c6d7937b2 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -20,8 +20,7 @@ using namespace llvm; void SparcELFMCAsmInfo::anchor() {} -SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); +SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Triple &TheTriple) { bool isV9 = (TheTriple.getArch() == Triple::sparcv9); IsLittleEndian = (TheTriple.getArch() == Triple::sparcel); @@ -51,8 +50,8 @@ SparcELFMCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym, MCStreamer &Streamer) const { if (Encoding & dwarf::DW_EH_PE_pcrel) { MCContext &Ctx = Streamer.getContext(); - return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32, - MCSymbolRefExpr::Create(Sym, Ctx), Ctx); + return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32, + MCSymbolRefExpr::create(Sym, Ctx), Ctx); } return MCAsmInfo::getExprForPersonalitySymbol(Sym, Encoding, Streamer); @@ -64,8 +63,8 @@ SparcELFMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, MCStreamer &Streamer) const { if (Encoding & dwarf::DW_EH_PE_pcrel) { MCContext &Ctx = Streamer.getContext(); - return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32, - MCSymbolRefExpr::Create(Sym, Ctx), Ctx); + return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32, + MCSymbolRefExpr::create(Sym, Ctx), Ctx); } return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index 84de55145b65..12386f14443e 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -17,12 +17,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { -class StringRef; +class Triple; class SparcELFMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit SparcELFMCAsmInfo(StringRef TT); + explicit SparcELFMCAsmInfo(const Triple &TheTriple); const MCExpr* getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const override; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index 34079eea7885..9171d4dc9c00 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -86,16 +86,10 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, if (Ctx.getAsmInfo()->isLittleEndian()) { // Output the bits in little-endian byte order. - for (unsigned i = 0; i != 4; ++i) { - OS << (char)Bits; - Bits >>= 8; - } + support::endian::Writer<support::little>(OS).write<uint32_t>(Bits); } else { // Output the bits in big-endian byte order. - for (unsigned i = 0; i != 4; ++i) { - OS << (char)(Bits >> 24); - Bits <<= 8; - } + support::endian::Writer<support::big>(OS).write<uint32_t>(Bits); } unsigned tlsOpNo = 0; switch (MI.getOpcode()) { @@ -137,7 +131,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } int64_t Res; - if (Expr->EvaluateAsAbsolute(Res)) + if (Expr->evaluateAsAbsolute(Res)) return Res; llvm_unreachable("Unhandled expression!"); diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index d97e3a25c5a7..e85a8cd5e339 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -15,9 +15,8 @@ #include "SparcMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Object/ELF.h" @@ -26,20 +25,17 @@ using namespace llvm; #define DEBUG_TYPE "sparcmcexpr" const SparcMCExpr* -SparcMCExpr::Create(VariantKind Kind, const MCExpr *Expr, +SparcMCExpr::create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { return new (Ctx) SparcMCExpr(Kind, Expr); } - - -void SparcMCExpr::PrintImpl(raw_ostream &OS) const -{ +void SparcMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { bool closeParen = printVariantKind(OS, Kind); const MCExpr *Expr = getSubExpr(); - Expr->print(OS); + Expr->print(OS, MAI); if (closeParen) OS << ')'; @@ -160,10 +156,10 @@ Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) { } bool -SparcMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, +SparcMCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const { - return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup); + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); } static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { @@ -184,8 +180,7 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { case MCExpr::SymbolRef: { const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr); - MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); + cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS); break; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h index 116e10406a7c..d08ad86dbe04 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h @@ -69,7 +69,7 @@ public: /// @name Construction /// @{ - static const SparcMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + static const SparcMCExpr *create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx); /// @} /// @name Accessors @@ -85,13 +85,13 @@ public: Sparc::Fixups getFixupKind() const { return getFixupKind(Kind); } /// @} - void PrintImpl(raw_ostream &OS) const override; - bool EvaluateAsRelocatableImpl(MCValue &Res, + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *FindAssociatedSection() const override { - return getSubExpr()->FindAssociatedSection(); + MCSection *findAssociatedSection() const override { + return getSubExpr()->findAssociatedSection(); } void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 4d5672e29550..d34c87977168 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -34,7 +34,7 @@ using namespace llvm; #include "SparcGenRegisterInfo.inc" static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT); unsigned Reg = MRI.getDwarfRegNum(SP::O6, true); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); @@ -43,7 +43,7 @@ static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI, } static MCAsmInfo *createSparcV9MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT); unsigned Reg = MRI.getDwarfRegNum(SP::O6, true); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 2047); diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 9903bc5799da..c5f046bfc5bb 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -78,9 +78,9 @@ namespace { static MCOperand createSparcMCOperand(SparcMCExpr::VariantKind Kind, MCSymbol *Sym, MCContext &OutContext) { - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Sym, + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Sym, OutContext); - const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym, OutContext); + const SparcMCExpr *expr = SparcMCExpr::create(Kind, MCSym, OutContext); return MCOperand::createExpr(expr); } @@ -94,15 +94,15 @@ static MCOperand createPCXRelExprOp(SparcMCExpr::VariantKind Kind, MCSymbol *CurLabel, MCContext &OutContext) { - const MCSymbolRefExpr *GOT = MCSymbolRefExpr::Create(GOTLabel, OutContext); - const MCSymbolRefExpr *Start = MCSymbolRefExpr::Create(StartLabel, + const MCSymbolRefExpr *GOT = MCSymbolRefExpr::create(GOTLabel, OutContext); + const MCSymbolRefExpr *Start = MCSymbolRefExpr::create(StartLabel, OutContext); - const MCSymbolRefExpr *Cur = MCSymbolRefExpr::Create(CurLabel, + const MCSymbolRefExpr *Cur = MCSymbolRefExpr::create(CurLabel, OutContext); - const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Cur, Start, OutContext); - const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(GOT, Sub, OutContext); - const SparcMCExpr *expr = SparcMCExpr::Create(Kind, + const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Cur, Start, OutContext); + const MCBinaryExpr *Add = MCBinaryExpr::createAdd(GOT, Sub, OutContext); + const SparcMCExpr *expr = SparcMCExpr::create(Kind, Add, OutContext); return MCOperand::createExpr(expr); } @@ -199,7 +199,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, EmitHiLo(*OutStreamer, GOTLabel, SparcMCExpr::VK_Sparc_H44, SparcMCExpr::VK_Sparc_M44, MCRegOP, OutContext, STI); - MCOperand imm = MCOperand::createExpr(MCConstantExpr::Create(12, + MCOperand imm = MCOperand::createExpr(MCConstantExpr::create(12, OutContext)); EmitSHL(*OutStreamer, MCRegOP, imm, MCRegOP, STI); MCOperand lo = createSparcMCOperand(SparcMCExpr::VK_Sparc_L44, @@ -211,7 +211,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, EmitHiLo(*OutStreamer, GOTLabel, SparcMCExpr::VK_Sparc_HH, SparcMCExpr::VK_Sparc_HM, MCRegOP, OutContext, STI); - MCOperand imm = MCOperand::createExpr(MCConstantExpr::Create(32, + MCOperand imm = MCOperand::createExpr(MCConstantExpr::create(32, OutContext)); EmitSHL(*OutStreamer, MCRegOP, imm, MCRegOP, STI); // Use register %o7 to load the lower 32 bits. @@ -361,10 +361,10 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << (int)MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_BlockAddress: O << GetBlockAddressSymbol(MO.getBlockAddress())->getName(); diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp index 9388d594973c..b084d0021ba0 100644 --- a/lib/Target/Sparc/SparcMCInstLower.cpp +++ b/lib/Target/Sparc/SparcMCInstLower.cpp @@ -59,9 +59,9 @@ static MCOperand LowerSymbolOperand(const MachineInstr *MI, break; } - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, AP.OutContext); - const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym, + const SparcMCExpr *expr = SparcMCExpr::create(Kind, MCSym, AP.OutContext); return MCOperand::createExpr(expr); } diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp index 32b2240f87ea..412e124f9a26 100644 --- a/lib/Target/Sparc/SparcTargetObjectFile.cpp +++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp @@ -34,8 +34,8 @@ const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference( } MCContext &Ctx = getContext(); - return SparcMCExpr::Create(SparcMCExpr::VK_Sparc_R_DISP32, - MCSymbolRefExpr::Create(SSym, Ctx), Ctx); + return SparcMCExpr::create(SparcMCExpr::VK_Sparc_R_DISP32, + MCSymbolRefExpr::create(SSym, Ctx), Ctx); } return TargetLoweringObjectFileELF::getTTypeGlobalReference( diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index b721def54e12..3aa4c6bd32d6 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -865,9 +865,9 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, } MCSymbol *Sym = Ctx.createTempSymbol(); Out.EmitLabel(Sym); - const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); - Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx); + Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx); } // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol. @@ -904,7 +904,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, } StringRef Identifier = Parser.getTok().getString(); - Sym = MCSymbolRefExpr::Create(Ctx.getOrCreateSymbol(Identifier), + Sym = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(Identifier), Kind, Ctx); Parser.Lex(); } diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 373ddfa7e257..059ae3f7fb09 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -37,13 +37,14 @@ void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, } } -void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { +void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI, + raw_ostream &O) { if (MO.isReg()) O << '%' << getRegisterName(MO.getReg()); else if (MO.isImm()) O << MO.getImm(); else if (MO.isExpr()) - O << *MO.getExpr(); + MO.getExpr()->print(O, MAI); else llvm_unreachable("Invalid operand"); } @@ -147,7 +148,7 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, O << "0x"; O.write_hex(MO.getImm()); } else - O << *MO.getExpr(); + MO.getExpr()->print(O, &MAI); } void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, @@ -175,7 +176,7 @@ void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - printOperand(MI->getOperand(OpNum), O); + printOperand(MI->getOperand(OpNum), &MAI, O); } void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index 847b6962e6f2..ba55e686f3ef 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -35,7 +35,8 @@ public: raw_ostream &O); // Print the given operand. - static void printOperand(const MCOperand &MO, raw_ostream &O); + static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI, + raw_ostream &O); // Override MCInstPrinter. void printRegName(raw_ostream &O, unsigned RegNo) const override; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 1c3887ab5456..0e8a680d4dd4 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -105,7 +105,7 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, bool SystemZMCAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { for (uint64_t I = 0; I != Count; ++I) - OW->Write8(7); + OW->write8(7); return true; } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index 0161d6263e7d..b17977d41be1 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -13,7 +13,7 @@ using namespace llvm; -SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) { +SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) { PointerSize = 8; CalleeSaveStackSlotSize = 8; IsLittleEndian = false; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h index 19b5b4b09724..800f89232063 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h @@ -14,11 +14,11 @@ #include "llvm/Support/Compiler.h" namespace llvm { -class StringRef; +class Triple; class SystemZMCAsmInfo : public MCAsmInfoELF { public: - explicit SystemZMCAsmInfo(StringRef TT); + explicit SystemZMCAsmInfo(const Triple &TT); }; } // end namespace llvm diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index c9290c1922d3..fd52a2ebf2fd 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -217,7 +217,7 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, const MCOperand &MO = MI.getOperand(OpNum); const MCExpr *Expr; if (MO.isImm()) - Expr = MCConstantExpr::Create(MO.getImm() + Offset, Ctx); + Expr = MCConstantExpr::create(MO.getImm() + Offset, Ctx); else { Expr = MO.getExpr(); if (Offset) { @@ -225,8 +225,8 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, // is relative to the operand field itself, which is Offset bytes // into MI. Add Offset to the relocation value to cancel out // this difference. - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx); + Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx); } } Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind)); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 8c2075afe505..92681cf6e44b 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -132,7 +132,7 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) { } static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index a0d079fcc359..3dca7bd89f05 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -68,14 +68,14 @@ static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) { static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) { StringRef Name = "__tls_get_offset"; - return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name), + return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name), MCSymbolRefExpr::VK_PLT, Context); } static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { StringRef Name = "_GLOBAL_OFFSET_TABLE_"; - return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name), + return MCSymbolRefExpr::create(Context.getOrCreateSymbol(Name), MCSymbolRefExpr::VK_None, Context); } @@ -285,7 +285,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV); const MCExpr *Expr = - MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()), + MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()), getModifierVariantKind(ZCPV->getModifier()), OutContext); uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType()); @@ -305,7 +305,7 @@ bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, } else { SystemZMCInstLower Lower(MF->getContext(), *this); MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo))); - SystemZInstPrinter::printOperand(MO, OS); + SystemZInstPrinter::printOperand(MO, MAI, OS); } return false; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 24b5a41d7f67..91e12c2d9d7e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -506,9 +506,10 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, *Fast = true; return true; } - + bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. if (AM.BaseGV) diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index b001abc693d6..2f7617bbdac3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -369,7 +369,8 @@ public: bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const override; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 90598852b5ed..4346850e0ac5 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -752,10 +752,9 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; } -MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Opcode = MI->getOpcode(); @@ -765,9 +764,11 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, isInt<8>(MI->getOperand(2).getImm()) && !MI->getOperand(3).getReg()) { // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::AGSI)) - .addFrameIndex(FrameIndex).addImm(0) - .addImm(MI->getOperand(2).getImm()); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::AGSI)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI->getOperand(2).getImm()); } return nullptr; } @@ -786,9 +787,11 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, isInt<8>(MI->getOperand(2).getImm())) { // A(G)HI %reg, CONST -> A(G)SI %mem, CONST Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI); - return BuildMI(MF, MI->getDebugLoc(), get(Opcode)) - .addFrameIndex(FrameIndex).addImm(0) - .addImm(MI->getOperand(2).getImm()); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(Opcode)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(MI->getOperand(2).getImm()); } if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) { @@ -798,17 +801,23 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // source register instead. if (OpNum == 0) { unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; - return BuildMI(MF, MI->getDebugLoc(), get(StoreOpcode)) - .addOperand(MI->getOperand(1)).addFrameIndex(FrameIndex) - .addImm(0).addReg(0); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(StoreOpcode)) + .addOperand(MI->getOperand(1)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); } // If we're spilling the source of an LDGR or LGDR, load the // destination register instead. if (OpNum == 1) { unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; unsigned Dest = MI->getOperand(0).getReg(); - return BuildMI(MF, MI->getDebugLoc(), get(LoadOpcode), Dest) - .addFrameIndex(FrameIndex).addImm(0).addReg(0); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(LoadOpcode), Dest) + .addFrameIndex(FrameIndex) + .addImm(0) + .addReg(0); } } @@ -830,17 +839,25 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (MMO->getSize() == Size && !MMO->isVolatile()) { // Handle conversion of loads. if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) - .addFrameIndex(FrameIndex).addImm(0).addImm(Size) - .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) - .addMemOperand(MMO); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::MVC)) + .addFrameIndex(FrameIndex) + .addImm(0) + .addImm(Size) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()) + .addMemOperand(MMO); } // Handle conversion of stores. if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) - .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) - .addImm(Size).addFrameIndex(FrameIndex).addImm(0) - .addMemOperand(MMO); + return BuildMI(*InsertPt->getParent(), InsertPt, MI->getDebugLoc(), + get(SystemZ::MVC)) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()) + .addImm(Size) + .addFrameIndex(FrameIndex) + .addImm(0) + .addMemOperand(MMO); } } } @@ -856,7 +873,8 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, assert(AccessBytes != 0 && "Size of access should be known"); assert(AccessBytes <= Size && "Access outside the frame index"); uint64_t Offset = Size - AccessBytes; - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode)); + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI->getDebugLoc(), get(MemOpcode)); for (unsigned I = 0; I < OpNum; ++I) MIB.addOperand(MI->getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); @@ -869,10 +887,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return nullptr; } -MachineInstr * -SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineInstr *LoadMI) const { +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { return nullptr; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index b55810b253f1..e47f2ee9d0b6 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -187,9 +187,11 @@ public: LiveVariables *LV) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp index a1dcedab54e7..2655e4866b20 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -68,11 +68,11 @@ SystemZMCInstLower::getExpr(const MachineOperand &MO, default: llvm_unreachable("unknown operand type"); } - const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx); if (HasOffset) if (int64_t Offset = MO.getOffset()) { - const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + const MCExpr *OffsetExpr = MCConstantExpr::create(Offset, Ctx); + Expr = MCBinaryExpr::createAdd(Expr, OffsetExpr, Ctx); } return Expr; } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index a184b92d3c9f..d498bb104ef8 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -313,7 +313,7 @@ const MCExpr *TargetLoweringObjectFile::getTTypeGlobalReference( const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { const MCSymbolRefExpr *Ref = - MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang), getContext()); + MCSymbolRefExpr::create(TM.getSymbol(GV, Mang), getContext()); return getTTypeReference(Ref, Encoding, Streamer); } @@ -332,8 +332,8 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, // .-foo addressing. MCSymbol *PCSym = getContext().createTempSymbol(); Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); - return MCBinaryExpr::CreateSub(Sym, PC, getContext()); + const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext()); + return MCBinaryExpr::createSub(Sym, PC, getContext()); } } } @@ -341,7 +341,7 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const { // FIXME: It's not clear what, if any, default this should have - perhaps a // null return could mean 'no location' & we should just do that here. - return MCSymbolRefExpr::Create(Sym, *Ctx); + return MCSymbolRefExpr::create(Sym, *Ctx); } void TargetLoweringObjectFile::getNameWithPrefix( diff --git a/lib/Target/TargetRecip.cpp b/lib/Target/TargetRecip.cpp new file mode 100644 index 000000000000..42bc487fe6d8 --- /dev/null +++ b/lib/Target/TargetRecip.cpp @@ -0,0 +1,225 @@ +//===-------------------------- TargetRecip.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class is used to customize machine-specific reciprocal estimate code +// generation in a target-independent way. +// If a target does not support operations in this specification, then code +// generation will default to using supported operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetRecip.h" +#include <map> + +using namespace llvm; + +// These are the names of the individual reciprocal operations. These are +// the key strings for queries and command-line inputs. +// In addition, the command-line interface recognizes the global parameters +// "all", "none", and "default". +static const char *RecipOps[] = { + "divd", + "divf", + "vec-divd", + "vec-divf", + "sqrtd", + "sqrtf", + "vec-sqrtd", + "vec-sqrtf", +}; + +// The uninitialized state is needed for the enabled settings and refinement +// steps because custom settings may arrive via the command-line before target +// defaults are set. +TargetRecip::TargetRecip() { + unsigned NumStrings = llvm::array_lengthof(RecipOps); + for (unsigned i = 0; i < NumStrings; ++i) + RecipMap.insert(std::make_pair(RecipOps[i], RecipParams())); +} + +static bool parseRefinementStep(const StringRef &In, size_t &Position, + uint8_t &Value) { + const char RefStepToken = ':'; + Position = In.find(RefStepToken); + if (Position == StringRef::npos) + return false; + + StringRef RefStepString = In.substr(Position + 1); + // Allow exactly one numeric character for the additional refinement + // step parameter. + if (RefStepString.size() == 1) { + char RefStepChar = RefStepString[0]; + if (RefStepChar >= '0' && RefStepChar <= '9') { + Value = RefStepChar - '0'; + return true; + } + } + report_fatal_error("Invalid refinement step for -recip."); +} + +bool TargetRecip::parseGlobalParams(const std::string &Arg) { + StringRef ArgSub = Arg; + + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + StringRef RefStepString; + if (parseRefinementStep(ArgSub, RefPos, RefSteps)) { + // Split the string for further processing. + RefStepString = ArgSub.substr(RefPos + 1); + ArgSub = ArgSub.substr(0, RefPos); + } + bool Enable; + bool UseDefaults; + if (ArgSub == "all") { + UseDefaults = false; + Enable = true; + } else if (ArgSub == "none") { + UseDefaults = false; + Enable = false; + } else if (ArgSub == "default") { + UseDefaults = true; + } else { + // Any other string is invalid or an individual setting. + return false; + } + + // All enable values will be initialized to target defaults if 'default' was + // specified. + if (!UseDefaults) + for (auto &KV : RecipMap) + KV.second.Enabled = Enable; + + // Custom refinement count was specified with all, none, or default. + if (!RefStepString.empty()) + for (auto &KV : RecipMap) + KV.second.RefinementSteps = RefSteps; + + return true; +} + +void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) { + static const char DisabledPrefix = '!'; + unsigned NumArgs = Args.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + StringRef Val = Args[i]; + + bool IsDisabled = Val[0] == DisabledPrefix; + // Ignore the disablement token for string matching. + if (IsDisabled) + Val = Val.substr(1); + + size_t RefPos; + uint8_t RefSteps; + StringRef RefStepString; + if (parseRefinementStep(Val, RefPos, RefSteps)) { + // Split the string for further processing. + RefStepString = Val.substr(RefPos + 1); + Val = Val.substr(0, RefPos); + } + + RecipIter Iter = RecipMap.find(Val); + if (Iter == RecipMap.end()) { + // Try again specifying float suffix. + Iter = RecipMap.find(Val.str() + 'f'); + if (Iter == RecipMap.end()) { + Iter = RecipMap.find(Val.str() + 'd'); + assert(Iter == RecipMap.end() && "Float entry missing from map"); + report_fatal_error("Invalid option for -recip."); + } + + // The option was specified without a float or double suffix. + if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) { + // Make sure that the double entry was not already specified. + // The float entry will be checked below. + report_fatal_error("Duplicate option for -recip."); + } + } + + if (Iter->second.Enabled != Uninitialized) + report_fatal_error("Duplicate option for -recip."); + + // Mark the matched option as found. Do not allow duplicate specifiers. + Iter->second.Enabled = !IsDisabled; + if (!RefStepString.empty()) + Iter->second.RefinementSteps = RefSteps; + + // If the precision was not specified, the double entry is also initialized. + if (Val.back() != 'f' && Val.back() != 'd') { + RecipMap[Val.str() + 'd'].Enabled = !IsDisabled; + if (!RefStepString.empty()) + RecipMap[Val.str() + 'd'].RefinementSteps = RefSteps; + } + } +} + +TargetRecip::TargetRecip(const std::vector<std::string> &Args) : + TargetRecip() { + unsigned NumArgs = Args.size(); + + // Check if "all", "default", or "none" was specified. + if (NumArgs == 1 && parseGlobalParams(Args[0])) + return; + + parseIndividualParams(Args); +} + +bool TargetRecip::isEnabled(const StringRef &Key) const { + ConstRecipIter Iter = RecipMap.find(Key); + assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); + assert(Iter->second.Enabled != Uninitialized && + "Enablement setting was not initialized"); + return Iter->second.Enabled; +} + +unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const { + ConstRecipIter Iter = RecipMap.find(Key); + assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); + assert(Iter->second.RefinementSteps != Uninitialized && + "Refinement step setting was not initialized"); + return Iter->second.RefinementSteps; +} + +/// Custom settings (previously initialized values) override target defaults. +void TargetRecip::setDefaults(const StringRef &Key, bool Enable, + unsigned RefSteps) { + if (Key == "all") { + for (auto &KV : RecipMap) { + RecipParams &RP = KV.second; + if (RP.Enabled == Uninitialized) + RP.Enabled = Enable; + if (RP.RefinementSteps == Uninitialized) + RP.RefinementSteps = RefSteps; + } + } else { + RecipParams &RP = RecipMap[Key]; + if (RP.Enabled == Uninitialized) + RP.Enabled = Enable; + if (RP.RefinementSteps == Uninitialized) + RP.RefinementSteps = RefSteps; + } +} + +bool TargetRecip::operator==(const TargetRecip &Other) const { + for (const auto &KV : RecipMap) { + const StringRef &Op = KV.first; + const RecipParams &RP = KV.second; + const RecipParams &OtherRP = Other.RecipMap.find(Op)->second; + if (RP.RefinementSteps != OtherRP.RefinementSteps) + return false; + if (RP.Enabled != OtherRP.Enabled) + return false; + } + return true; +} diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index a21f8c723503..9eee4a0f3d82 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -315,7 +315,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, // Test (%SrcReg) { - const MCExpr *Disp = MCConstantExpr::Create(0, Ctx); + const MCExpr *Disp = MCConstantExpr::create(0, Ctx); std::unique_ptr<X86Operand> Op(X86Operand::CreateMem( getPointerWidth(), 0, Disp, SrcReg, 0, AccessSize, SMLoc(), SMLoc())); InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx, @@ -324,7 +324,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, // Test -1(%SrcReg, %CntReg, AccessSize) { - const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx); + const MCExpr *Disp = MCConstantExpr::create(-1, Ctx); std::unique_ptr<X86Operand> Op(X86Operand::CreateMem( getPointerWidth(), 0, Disp, SrcReg, CntReg, AccessSize, SMLoc(), SMLoc())); @@ -334,7 +334,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, // Test (%DstReg) { - const MCExpr *Disp = MCConstantExpr::Create(0, Ctx); + const MCExpr *Disp = MCConstantExpr::create(0, Ctx); std::unique_ptr<X86Operand> Op(X86Operand::CreateMem( getPointerWidth(), 0, Disp, DstReg, 0, AccessSize, SMLoc(), SMLoc())); InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out); @@ -342,7 +342,7 @@ void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, // Test -1(%DstReg, %CntReg, AccessSize) { - const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx); + const MCExpr *Disp = MCConstantExpr::create(-1, Ctx); std::unique_ptr<X86Operand> Op(X86Operand::CreateMem( getPointerWidth(), 0, Disp, DstReg, CntReg, AccessSize, SMLoc(), SMLoc())); @@ -461,7 +461,7 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op, while (Residue != 0) { const MCConstantExpr *Disp = - MCConstantExpr::Create(ApplyDisplacementBounds(Residue), Ctx); + MCConstantExpr::create(ApplyDisplacementBounds(Residue), Ctx); std::unique_ptr<X86Operand> DispOp = X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(), SMLoc()); @@ -493,7 +493,7 @@ X86AddressSanitizer::AddDisplacement(X86Operand &Op, int64_t Displacement, CheckDisplacementBounds(NewDisplacement); *Residue = Displacement - NewDisplacement; - const MCExpr *Disp = MCConstantExpr::Create(NewDisplacement, Ctx); + const MCExpr *Disp = MCConstantExpr::create(NewDisplacement, Ctx); return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(), Disp, Op.getMemBaseReg(), Op.getMemIndexReg(), Op.getMemScale(), SMLoc(), SMLoc()); @@ -615,7 +615,7 @@ private: const std::string &Fn = FuncName(AccessSize, IsWrite); MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn)); const MCSymbolRefExpr *FnExpr = - MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); + MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr)); } }; @@ -643,7 +643,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall( MCInst Inst; Inst.setOpcode(X86::MOV8rm); Inst.addOperand(MCOperand::createReg(ShadowRegI8)); - const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1, SMLoc(), SMLoc())); @@ -654,7 +654,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall( EmitInstruction( Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8)); MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg( @@ -669,7 +669,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall( case 1: break; case 2: { - const MCExpr *Disp = MCConstantExpr::Create(1, Ctx); + const MCExpr *Disp = MCConstantExpr::create(1, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1, SMLoc(), SMLoc())); @@ -720,7 +720,7 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge( Inst.setOpcode(X86::CMP16mi); break; } - const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1, SMLoc(), SMLoc())); @@ -729,7 +729,7 @@ void X86AddressSanitizer32::InstrumentMemOperandLarge( EmitInstruction(Out, Inst); } MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx); @@ -743,7 +743,7 @@ void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize, // No need to test when ECX is equals to zero. MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction( Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX)); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); @@ -860,7 +860,7 @@ public: private: void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) { - const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(Offset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc())); @@ -885,7 +885,7 @@ private: const std::string &Fn = FuncName(AccessSize, IsWrite); MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn)); const MCSymbolRefExpr *FnExpr = - MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); + MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr)); } }; @@ -914,7 +914,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall( MCInst Inst; Inst.setOpcode(X86::MOV8rm); Inst.addOperand(MCOperand::createReg(ShadowRegI8)); - const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1, SMLoc(), SMLoc())); @@ -925,7 +925,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall( EmitInstruction( Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8)); MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg( @@ -940,7 +940,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall( case 1: break; case 2: { - const MCExpr *Disp = MCConstantExpr::Create(1, Ctx); + const MCExpr *Disp = MCConstantExpr::create(1, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1, SMLoc(), SMLoc())); @@ -991,7 +991,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge( Inst.setOpcode(X86::CMP16mi); break; } - const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1, SMLoc(), SMLoc())); @@ -1001,7 +1001,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLarge( } MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx); @@ -1015,7 +1015,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize, // No need to test when RCX is equals to zero. MCSymbol *DoneSym = Ctx.createTempSymbol(); - const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx); EmitInstruction( Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX)); EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr)); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 3047fd1078a9..e8965710f022 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -973,7 +973,7 @@ void X86AsmParser::SetFrameRegister(unsigned RegNo) { std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { unsigned basereg = is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI); - const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); + const MCExpr *Disp = MCConstantExpr::create(0, getContext()); return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); @@ -982,7 +982,7 @@ std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { unsigned basereg = is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI); - const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); + const MCExpr *Disp = MCConstantExpr::create(0, getContext()); return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); @@ -1195,7 +1195,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; const MCExpr *Val = - MCSymbolRefExpr::Create(Sym, Variant, getContext()); + MCSymbolRefExpr::create(Sym, Variant, getContext()); if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); StringRef Identifier = Sym->getName(); @@ -1265,9 +1265,9 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, } if (SM.getImm() || !Disp) { - const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext()); + const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext()); if (Disp) - Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext()); + Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext()); else Disp = Imm; // An immediate displacement only. } @@ -1354,7 +1354,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, // Create the symbol reference. MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); + Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); return false; } @@ -1382,7 +1382,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, // An immediate following a 'segment register', 'colon' token sequence can // be followed by a bracketed expression. If it isn't we know we have our // final segment override. - const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext()); + const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext()); return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1, Start, ImmDispToken.getEndLoc(), Size); @@ -1435,7 +1435,7 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { return ErrorOperand(Tok.getLoc(), "Expected } at this point"); Parser.Lex(); // Eat "}" const MCExpr *RndModeOp = - MCConstantExpr::Create(rndMode, Parser.getContext()); + MCConstantExpr::create(rndMode, Parser.getContext()); return X86Operand::CreateImm(RndModeOp, Start, End); } if(Tok.getIdentifier().equals("sae")){ @@ -1499,7 +1499,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, return nullptr; } - const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext()); + const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext()); // BaseReg is non-zero to avoid assertions. In the context of inline asm, // we're pointing to a local variable in memory, so the base register is // really the frame or stack pointer. @@ -1549,7 +1549,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, Val)); } - NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); return false; } @@ -1623,7 +1623,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) { unsigned Len = End.getPointer() - TypeLoc.getPointer(); InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); - const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); + const MCExpr *Imm = MCConstantExpr::create(CVal, getContext()); return X86Operand::CreateImm(Imm, Start, End); } @@ -1683,7 +1683,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End, Size); - const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); + const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext()); return X86Operand::CreateImm(ImmExpr, Start, End); } @@ -1841,7 +1841,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the '(' and see what is // after it. - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; if (getParser().parseExpression(Disp, ExprEnd)) return nullptr; @@ -2061,7 +2061,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx), NameLoc)); - const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode, + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, getParser().getContext()); Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); @@ -2088,7 +2088,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) { Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc)); - const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode, + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, getParser().getContext()); Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); @@ -2115,7 +2115,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (ComparisonCode != ~0U) { Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc)); - const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode, + const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode, getParser().getContext()); Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); @@ -2375,7 +2375,7 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { X86Operand &Op = static_cast<X86Operand &>(*Ops[1]); assert(Op.isImm() && "expected immediate"); int64_t Res; - if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) { + if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) { Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]"); return false; } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 3469d19f4fd2..6e99c37c2bc7 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -546,6 +546,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, case TYPE_XMM512: mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); return; + case TYPE_BNDR: + mcInst.addOperand(MCOperand::createReg(X86::BND0 + (immediate >> 4))); case TYPE_REL8: isBranch = true; pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; @@ -827,6 +829,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_VK16: case TYPE_DEBUGREG: case TYPE_CONTROLREG: + case TYPE_BNDR: return translateRMRegister(mcInst, insn); case TYPE_M: case TYPE_M8: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 9e6505001393..301db72feafb 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -460,6 +460,7 @@ enum OperandEncoding { ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ + ENUM_ENTRY(TYPE_BNDR, "MPX bounds register") \ \ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \ diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index af4399a41a06..ea727e6e82fb 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -150,11 +150,11 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo, // that address in hex. const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) { O << formatHex((uint64_t)Address); } else { // Otherwise, just print the expression. - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } } @@ -178,7 +178,9 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << markup("<imm:") << '$' << *Op.getExpr() << markup(">"); + O << markup("<imm:") << '$'; + Op.getExpr()->print(O, &MAI); + O << markup(">"); } } @@ -203,7 +205,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, O << formatImm(DispVal); } else { assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); - O << *DispSpec.getExpr(); + DispSpec.getExpr()->print(O, &MAI); } if (IndexReg.getReg() || BaseReg.getReg()) { @@ -273,7 +275,7 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, O << formatImm(DispSpec.getImm()); } else { assert(DispSpec.isExpr() && "non-immediate displacement?"); - O << *DispSpec.getExpr(); + DispSpec.getExpr()->print(O, &MAI); } O << markup(">"); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 4d92dafa938a..879378fc7a97 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -131,12 +131,12 @@ void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo, // that address in hex. const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) { O << formatHex((uint64_t)Address); } else { // Otherwise, just print the expression. - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } } @@ -150,7 +150,7 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << formatImm((int64_t)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + Op.getExpr()->print(O, &MAI); } } @@ -187,7 +187,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, if (!DispSpec.isImm()) { if (NeedPlus) O << " + "; assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); - O << *DispSpec.getExpr(); + DispSpec.getExpr()->print(O, &MAI); } else { int64_t DispVal = DispSpec.getImm(); if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { @@ -245,7 +245,7 @@ void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op, O << formatImm(DispSpec.getImm()); } else { assert(DispSpec.isExpr() && "non-immediate displacement?"); - O << *DispSpec.getExpr(); + DispSpec.getExpr()->print(O, &MAI); } O << ']'; diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 6d4284dc518b..1ac656d4614b 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -326,7 +326,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { // FIXME: We could generated something better than plain 0x90. if (!HasNopl) { for (uint64_t i = 0; i < Count; ++i) - OW->Write8(0x90); + OW->write8(0x90); return true; } @@ -336,10 +336,10 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; for (uint8_t i = 0; i < Prefixes; i++) - OW->Write8(0x66); + OW->write8(0x66); const uint8_t Rest = ThisNopLength - Prefixes; for (uint8_t i = 0; i < Rest; i++) - OW->Write8(Nops[Rest - 1][i]); + OW->write8(Nops[Rest - 1][i]); Count -= ThisNopLength; } while (Count != 0); diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 45088835cfb9..a33468dc4769 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -66,6 +66,7 @@ static X86_64RelType getType64(unsigned Kind, case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: return RT64_32; + case FK_PCRel_2: case FK_Data_2: return RT64_16; case FK_PCRel_1: diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp index a39def98e48e..2943dd383efa 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp @@ -31,13 +31,13 @@ public: StringRef SymName; SymI->getName(SymName); uint64_t SymAddr; SymI->getAddress(SymAddr); - uint64_t SymSize; SymI->getSize(SymSize); + uint64_t SymSize = SymI->getSize(); int64_t Addend; getELFRelocationAddend(Rel, Addend); MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName); // FIXME: check that the value is actually the same. if (!Sym->isVariable()) - Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx)); const MCExpr *Expr = nullptr; // If hasAddend is true, then we need to add Addend (r_addend) to Expr. @@ -76,7 +76,7 @@ public: case R_X86_64_PC64: // S + A - P (P/pcrel is implicit) hasAddend = true; - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); break; case R_X86_64_GOT32: case R_X86_64_GOT64: @@ -85,27 +85,27 @@ public: case R_X86_64_GOTPLT64: // G + A hasAddend = true; - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, Ctx); break; case R_X86_64_PLT32: // L + A - P -> S@PLT + A hasAddend = true; - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PLT, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PLT, Ctx); break; case R_X86_64_GLOB_DAT: case R_X86_64_JUMP_SLOT: // S - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); break; case R_X86_64_GOTPCREL: case R_X86_64_GOTPCREL64: // G + GOT + A - P -> S@GOTPCREL + A hasAddend = true; - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); break; case R_X86_64_GOTOFF64: // S + A - GOT - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTOFF, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTOFF, Ctx); break; case R_X86_64_PLTOFF64: // L + A - GOT @@ -113,15 +113,15 @@ public: case R_X86_64_SIZE32: case R_X86_64_SIZE64: // Z + A - Expr = MCConstantExpr::Create(SymSize, Ctx); + Expr = MCConstantExpr::create(SymSize, Ctx); break; default: - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); break; } if (Expr && hasAddend && Addend != 0) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(Addend, Ctx), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(Addend, Ctx), Ctx); return Expr; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index bda35f2b9726..fc0b0f89e23d 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -119,9 +119,9 @@ X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, MCStreamer &Streamer) const { MCContext &Context = Streamer.getContext(); const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); - const MCExpr *Four = MCConstantExpr::Create(4, Context); - return MCBinaryExpr::CreateAdd(Res, Four, Context); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); + const MCExpr *Four = MCConstantExpr::create(4, Context); + return MCBinaryExpr::createAdd(Res, Four, Context); } void X86MCAsmInfoMicrosoft::anchor() { } @@ -132,6 +132,11 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { PrivateLabelPrefix = ".L"; PointerSize = 8; WinEHEncodingType = WinEH::EncodingType::Itanium; + } else { + // 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just + // a place holder that the Windows EHStreamer looks for to suppress CFI + // output. In particular, usesWindowsCFI() returns false. + WinEHEncodingType = WinEH::EncodingType::X86; } ExceptionsType = ExceptionHandling::WinEH; diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 8aed7a4d9eb9..10c434c8b1b4 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -304,7 +304,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); return; } - Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx); + Expr = MCConstantExpr::create(DispOp.getImm(), Ctx); } else { Expr = DispOp.getExpr(); } @@ -351,7 +351,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, ImmOffset -= 1; if (ImmOffset) - Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(ImmOffset, Ctx), Ctx); // Emit a symbolic constant as a fixup and 4 zeros. diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 8e3c72158fc0..cc98e55dc695 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -115,8 +115,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { return X; } -static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { - Triple TheTriple(TT); +static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TheTriple) { bool is64Bit = TheTriple.getArch() == Triple::x86_64; MCAsmInfo *MAI; diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index 6cf5af7217f9..a5aadd6a385e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -39,33 +39,33 @@ public: MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName); // FIXME: check that the value is actually the same. if (!Sym->isVariable()) - Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx)); const MCExpr *Expr = nullptr; switch(RelType) { case X86_64_RELOC_TLV: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); break; case X86_64_RELOC_SIGNED_4: - Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), - MCConstantExpr::Create(4, Ctx), + Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx), + MCConstantExpr::create(4, Ctx), Ctx); break; case X86_64_RELOC_SIGNED_2: - Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), - MCConstantExpr::Create(2, Ctx), + Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx), + MCConstantExpr::create(2, Ctx), Ctx); break; case X86_64_RELOC_SIGNED_1: - Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx), - MCConstantExpr::Create(1, Ctx), + Expr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Sym, Ctx), + MCConstantExpr::create(1, Ctx), Ctx); break; case X86_64_RELOC_GOT_LOAD: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx); break; case X86_64_RELOC_GOT: - Expr = MCSymbolRefExpr::Create(Sym, isPCRel ? + Expr = MCSymbolRefExpr::create(Sym, isPCRel ? MCSymbolRefExpr::VK_GOTPCREL : MCSymbolRefExpr::VK_GOT, Ctx); @@ -84,7 +84,7 @@ public: report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); - const MCExpr *LHS = MCSymbolRefExpr::Create(Sym, Ctx); + const MCExpr *LHS = MCSymbolRefExpr::create(Sym, Ctx); symbol_iterator RSymI = Rel.getSymbol(); uint64_t RSymAddr; @@ -94,15 +94,15 @@ public: MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName); if (!RSym->isVariable()) - RSym->setVariableValue(MCConstantExpr::Create(RSymAddr, Ctx)); + RSym->setVariableValue(MCConstantExpr::create(RSymAddr, Ctx)); - const MCExpr *RHS = MCSymbolRefExpr::Create(RSym, Ctx); + const MCExpr *RHS = MCSymbolRefExpr::create(RSym, Ctx); - Expr = MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + Expr = MCBinaryExpr::createSub(LHS, RHS, Ctx); break; } default: - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); break; } return Expr; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 9da3e1fc36bf..95acc07192da 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -25,7 +25,7 @@ using namespace llvm; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { - bool RecordScatteredRelocation(MachObjectWriter *Writer, + bool recordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -33,7 +33,7 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter { MCValue Target, unsigned Log2Size, uint64_t &FixedValue); - void RecordTLVPRelocation(MachObjectWriter *Writer, + void recordTLVPRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -54,12 +54,10 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter { MCValue Target, uint64_t &FixedValue); public: - X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/Is64Bit) {} + X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override { @@ -142,13 +140,11 @@ void X86MachObjectWriter::RecordX86_64Relocation( const MCSymbol *A = &Target.getSymA()->getSymbol(); if (A->isTemporary()) A = &Writer->findAliasedSymbol(*A); - const MCSymbolData &A_SD = Asm.getSymbolData(*A); const MCSymbol *A_Base = Asm.getAtom(*A); const MCSymbol *B = &Target.getSymB()->getSymbol(); if (B->isTemporary()) B = &Writer->findAliasedSymbol(*B); - const MCSymbolData &B_SD = Asm.getSymbolData(*B); const MCSymbol *B_Base = Asm.getAtom(*B); // Neither symbol can be modified. @@ -190,7 +186,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( (!B_Base ? 0 : Writer->getSymbolAddress(*B_Base, Layout)); if (!A_Base) - Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; + Index = A->getFragment()->getParent()->getOrdinal() + 1; Type = MachO::X86_64_RELOC_UNSIGNED; MachO::any_relocation_info MRE; @@ -202,7 +198,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( if (B_Base) RelSymbol = B_Base; else - Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; + Index = B->getFragment()->getParent()->getOrdinal() + 1; Type = MachO::X86_64_RELOC_SUBTRACTOR; } else { const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); @@ -211,7 +207,6 @@ void X86MachObjectWriter::RecordX86_64Relocation( if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec)) Asm.addLocalUsedInReloc(*Symbol); } - const MCSymbolData &SD = Asm.getSymbolData(*Symbol); RelSymbol = Asm.getAtom(*Symbol); // Relocations inside debug sections always use local relocations when @@ -235,7 +230,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( Layout.getSymbolOffset(*RelSymbol); } else if (Symbol->isInSection() && !Symbol->isVariable()) { // The index is the section ordinal (1-based). - Index = SD.getFragment()->getParent()->getOrdinal() + 1; + Index = Symbol->getFragment()->getParent()->getOrdinal() + 1; Value += Writer->getSymbolAddress(*Symbol, Layout); if (IsPCRel) @@ -243,7 +238,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( } else if (Symbol->isVariable()) { const MCExpr *Value = Symbol->getVariableValue(); int64_t Res; - bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, + bool isAbs = Value->evaluateAsAbsolute(Res, Layout, Writer->getSectionAddressMap()); if (isAbs) { FixedValue = Res; @@ -339,7 +334,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, +bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -354,23 +349,21 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - const MCSymbolData *A_SD = &Asm.getSymbolData(*A); - if (!A_SD->getFragment()) + if (!A->getFragment()) report_fatal_error("symbol '" + A->getName() + "' can not be undefined in a subtraction expression", false); uint32_t Value = Writer->getSymbolAddress(*A, Layout); - uint64_t SecAddr = - Writer->getSectionAddress(A_SD->getFragment()->getParent()); + uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); FixedValue += SecAddr; uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbol *SB = &B->getSymbol(); - if (!B_SD->getFragment()) + if (!SB->getFragment()) report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression", false); @@ -380,10 +373,10 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Note that there is no longer any semantic difference between these two // relocation types from the linkers point of view, this is done solely for // pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF : - (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; + Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF + : (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); - FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. @@ -435,7 +428,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, return true; } -void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, +void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -490,7 +483,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // If this is a 32-bit TLVP reloc it's handled a bit differently. if (Target.getSymA() && Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { - RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + recordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); return; } @@ -499,7 +492,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); return; } @@ -515,10 +508,10 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, if (IsPCRel) Offset += 1 << Log2Size; // Try to record the scattered relocation if needed. Fall back to non - // scattered if necessary (see comments in RecordScatteredRelocation() + // scattered if necessary (see comments in recordScatteredRelocation() // for details). if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A) && - RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue)) return; @@ -538,7 +531,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // Resolve constant variables. if (A->isVariable()) { int64_t Res; - if (A->getVariableValue()->EvaluateAsAbsolute( + if (A->getVariableValue()->evaluateAsAbsolute( Res, Layout, Writer->getSectionAddressMap())) { FixedValue = Res; return; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index c70e2e954631..852267400bba 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -168,6 +168,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; +def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", + "Support MPX instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", @@ -188,10 +190,6 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; -def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true", - "Use RSQRT* to optimize square root calculations">; -def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst", - "true", "Use RCP* to optimize division calculations">; def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software floating point features.">; @@ -380,7 +378,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec]>; + FeatureSlowIncDec, FeatureMPX]>; def : KnightsLandingProc<"knl">; // FIXME: define SKX model @@ -391,7 +389,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec]>; + FeatureSlowIncDec, FeatureMPX]>; def : SkylakeProc<"skylake">; def : SkylakeProc<"skx">; // Legacy alias. @@ -444,7 +442,7 @@ def : ProcessorModel<"btver2", BtVer2Model, FeaturePRFCHW, FeatureAES, FeaturePCLMUL, FeatureBMI, FeatureF16C, FeatureMOVBE, FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem, - FeatureSlowSHLD, FeatureUseSqrtEst, FeatureUseRecipEst]>; + FeatureSlowSHLD]>; // TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips. diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index f97557e5c609..64fc6d0d7e5c 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -78,7 +78,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, switch (MO.getType()) { default: llvm_unreachable("unknown symbol type!"); case MachineOperand::MO_ConstantPoolIndex: - O << *P.GetCPISymbol(MO.getIndex()); + P.GetCPISymbol(MO.getIndex())->print(O, P.MAI); P.printOffset(MO.getOffset(), O); break; case MachineOperand::MO_GlobalAddress: { @@ -127,9 +127,12 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. if (GVSym->getName()[0] != '$') - O << *GVSym; - else - O << '(' << *GVSym << ')'; + GVSym->print(O, P.MAI); + else { + O << '('; + GVSym->print(O, P.MAI); + O << ')'; + } P.printOffset(MO.getOffset(), O); break; } @@ -146,12 +149,15 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, // These affect the name of the symbol, not any suffix. break; case X86II::MO_GOT_ABSOLUTE_ADDRESS: - O << " + [.-" << *P.MF->getPICBaseSymbol() << ']'; + O << " + [.-"; + P.MF->getPICBaseSymbol()->print(O, P.MAI); + O << ']'; break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - O << '-' << *P.MF->getPICBaseSymbol(); + O << '-'; + P.MF->getPICBaseSymbol()->print(O, P.MAI); break; case X86II::MO_TLSGD: O << "@TLSGD"; break; case X86II::MO_TLSLD: O << "@TLSLD"; break; @@ -168,7 +174,8 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, case X86II::MO_PLT: O << "@PLT"; break; case X86II::MO_TLVP: O << "@TLVP"; break; case X86II::MO_TLVP_PIC_BASE: - O << "@TLVP" << '-' << *P.MF->getPICBaseSymbol(); + O << "@TLVP" << '-'; + P.MF->getPICBaseSymbol()->print(O, P.MAI); break; case X86II::MO_SECREL: O << "@SECREL32"; break; } @@ -525,7 +532,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { // register any SEH handlers, so its object files should be safe. OutStreamer->EmitSymbolAttribute(S, MCSA_Global); OutStreamer->EmitAssignment( - S, MCConstantExpr::Create(int64_t(1), MMI->getContext())); + S, MCConstantExpr::create(int64_t(1), MMI->getContext())); } } } @@ -549,7 +556,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, // using NLPs; however, sometimes the types are local to the file. // We need to fill in the value for the NLP in those cases. OutStreamer.EmitValue( - MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()), + MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()), 4 /*size*/); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9af0aebea232..3dc75d76cee3 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -3530,9 +3530,9 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, SmallVector<MachineOperand, 8> AddrOps; AM.getFullAddress(AddrOps); - MachineInstr *Result = - XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, - Size, Alignment, /*AllowCommute=*/true); + MachineInstr *Result = XII.foldMemoryOperandImpl( + *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, + /*AllowCommute=*/true); if (!Result) return false; @@ -3541,20 +3541,21 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, // to just look at OpNo + the offset to the index reg. We actually need to // scan the instruction to find the index reg and see if its the correct reg // class. - for (MIOperands MO(Result); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->isDef() || MO->getReg() != AM.IndexReg) + unsigned OperandNo = 0; + for (MachineInstr::mop_iterator I = Result->operands_begin(), + E = Result->operands_end(); I != E; ++I, ++OperandNo) { + MachineOperand &MO = *I; + if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg) continue; // Found the index reg, now try to rewrite it. - unsigned OpNo = MO.getOperandNo(); unsigned IndexReg = constrainOperandRegClass(Result->getDesc(), - MO->getReg(), OpNo); - if (IndexReg == MO->getReg()) + MO.getReg(), OperandNo); + if (IndexReg == MO.getReg()) continue; - MO->setReg(IndexReg); + MO.setReg(IndexReg); } Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); - FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); MI->eraseFromParent(); return true; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3ba811574489..e3ec288a683e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -67,12 +67,6 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization( "rather than promotion."), cl::Hidden); -static cl::opt<int> ReciprocalEstimateRefinementSteps( - "x86-recip-refinement-steps", cl::init(1), - cl::desc("Specify the number of Newton-Raphson iterations applied to the " - "result of the hardware reciprocal estimate instruction."), - cl::NotHidden); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -842,13 +836,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - // Only provide customized ctpop vector bit twiddling for vector types we - // know to perform better than using the popcnt instructions on each vector - // element. If popcnt isn't supported, always provide the custom version. - if (!Subtarget->hasPOPCNT()) { - setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); - setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); - } + setOperationAction(ISD::CTPOP, MVT::v16i8, Custom); + setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { @@ -1113,6 +1104,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v32i8, Custom); + setOperationAction(ISD::CTPOP, MVT::v16i16, Custom); + setOperationAction(ISD::CTPOP, MVT::v8i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v4i64, Custom); + if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); @@ -1147,16 +1143,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // when we have a 256bit-wide blend with immediate. setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); - // Only provide customized ctpop vector bit twiddling for vector types we - // know to perform better than using the popcnt instructions on each - // vector element. If popcnt isn't supported, always provide the custom - // version. - if (!Subtarget->hasPOPCNT()) - setOperationAction(ISD::CTPOP, MVT::v4i64, Custom); - - // Custom CTPOP always performs better on natively supported v8i32 - setOperationAction(ISD::CTPOP, MVT::v8i32, Custom); - // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); @@ -1273,7 +1259,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); - + setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::SETCC, MVT::i1, Custom); setOperationAction(ISD::XOR, MVT::i1, Legal); @@ -1842,7 +1828,7 @@ X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, Subtarget->isPICStyleGOT()); // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF // entries. - return MCSymbolRefExpr::Create(MBB->getSymbol(), + return MCSymbolRefExpr::create(MBB->getSymbol(), MCSymbolRefExpr::VK_GOTOFF, Ctx); } @@ -1866,7 +1852,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); // Otherwise, the reference is relative to the PIC base. - return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); + return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); } std::pair<const TargetRegisterClass *, uint8_t> @@ -1981,7 +1967,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); } else if (VA.getLocInfo() == CCValAssign::BCvt) - ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy); + ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); assert(VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."); @@ -2018,13 +2004,13 @@ X86TargetLowering::LowerReturn(SDValue Chain, if (Subtarget->is64Bit()) { if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { - ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy); + ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget->hasSSE2()) - ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); + ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); } } } @@ -2451,7 +2437,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::BCvt) - ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. @@ -2780,6 +2766,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (MF.getTarget().Options.DisableTailCalls) isTailCall = false; + if (Subtarget->isPICStyleGOT() && + !MF.getTarget().Options.GuaranteedTailCallOpt) { + // If we are using a GOT, disable tail calls to external symbols with + // default visibility. Tail calling such a symbol requires using a GOT + // relocation, which forces early binding of the symbol. This breaks code + // that require lazy function symbol resolution. Using musttail or + // GuaranteedTailCallOpt will override this. + GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); + if (!G || (!G->getGlobal()->hasLocalLinkage() && + G->getGlobal()->hasDefaultVisibility())) + isTailCall = false; + } + bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall(); if (IsMustTail) { // Force this to be a tail call. The verifier rules are enough to ensure @@ -2898,14 +2897,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); else if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. - Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); + Arg = DAG.getBitcast(MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); } else Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg); + Arg = DAG.getBitcast(RegVT, Arg); break; case CCValAssign::Indirect: { // Store the argument. @@ -2964,8 +2963,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Note: The actual moving to ECX is done further down. GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); - if (G && !G->getGlobal()->hasHiddenVisibility() && - !G->getGlobal()->hasProtectedVisibility()) + if (G && !G->getGlobal()->hasLocalLinkage() && + G->getGlobal()->hasDefaultVisibility()) Callee = LowerGlobalAddress(Callee, DAG); else if (isa<ExternalSymbolSDNode>(Callee)) Callee = LowerExternalSymbol(Callee, DAG); @@ -4073,7 +4072,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, } else llvm_unreachable("Unexpected vector type"); - return DAG.getNode(ISD::BITCAST, dl, VT, Vec); + return DAG.getBitcast(VT, Vec); } static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, @@ -4200,9 +4199,9 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32; SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8); - Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256); + Vec256 = DAG.getBitcast(CastVT, Vec256); Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask); - return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256); + return DAG.getBitcast(ResultVT, Vec256); } return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128); @@ -4255,7 +4254,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, } else llvm_unreachable("Unexpected vector type"); - return DAG.getNode(ISD::BITCAST, dl, VT, Vec); + return DAG.getBitcast(VT, Vec); } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd @@ -4611,7 +4610,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } } - return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V); + return DAG.getBitcast(MVT::v16i8, V); } /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. @@ -4749,7 +4748,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, DAG.getIntPtrConstant(InsertPSMask, DL)); - return DAG.getNode(ISD::BITCAST, DL, VT, Result); + return DAG.getBitcast(VT, Result); } /// Return a vector logical shift node. @@ -4759,12 +4758,11 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, assert(VT.is128BitVector() && "Unknown type for VShift"); MVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; - SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); + SrcOp = DAG.getBitcast(ShVT, SrcOp); MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType()); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy); - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); + return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } static SDValue @@ -4949,7 +4947,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, SDValue(ResNode.getNode(), 1)); } - return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); + return DAG.getBitcast(VT, ResNode); } return SDValue(); } @@ -5261,8 +5259,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { SDValue Imm = ConvertI1VectorToInterger(Op, DAG); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, dl, VT, Imm); - SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm); + return DAG.getBitcast(VT, Imm); + SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, DAG.getIntPtrConstant(0, dl)); } @@ -5277,7 +5275,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { SDValue In = Op.getOperand(idx); if (In.getOpcode() == ISD::UNDEF) continue; - if (!isa<ConstantSDNode>(In)) + if (!isa<ConstantSDNode>(In)) NonConstIdx.push_back(idx); else { Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx; @@ -5304,12 +5302,12 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { } else if (HasConstElts) Imm = DAG.getConstant(0, dl, VT); - else + else Imm = DAG.getUNDEF(VT); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) - DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm); + DstVec = DAG.getBitcast(VT, Imm); else { - SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm); + SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, DAG.getIntPtrConstant(0, dl)); } @@ -5818,9 +5816,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // convert it to a vector with movd (S2V+shuffle to zero extend). Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); - return DAG.getNode( - ISD::BITCAST, dl, VT, - getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG)); + return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef( + Item, Idx * 2, true, Subtarget, DAG)); } } @@ -5866,7 +5863,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } - return DAG.getNode(ISD::BITCAST, dl, VT, Item); + return DAG.getBitcast(VT, Item); } } @@ -6257,6 +6254,42 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, return true; } +/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane. +/// +/// This checks a shuffle mask to see if it is performing the same +/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies +/// that it is also not lane-crossing. It may however involve a blend from the +/// same lane of a second vector. +/// +/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is +/// non-trivial to compute in the face of undef lanes. The representation is +/// *not* suitable for use with existing 256-bit shuffles as it will contain +/// entries from both V1 and V2 inputs to the wider mask. +static bool +is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, + SmallVectorImpl<int> &RepeatedMask) { + int LaneSize = 256 / VT.getScalarSizeInBits(); + RepeatedMask.resize(LaneSize, -1); + int Size = Mask.size(); + for (int i = 0; i < Size; ++i) { + if (Mask[i] < 0) + continue; + if ((Mask[i] % Size) / LaneSize != i / LaneSize) + // This entry crosses lanes, so there is no way to model this shuffle. + return false; + + // Ok, handle the in-lane shuffles by detecting if and when they repeat. + if (RepeatedMask[i % LaneSize] == -1) + // This is the first non-undef entry in this slot of a 256-bit lane. + RepeatedMask[i % LaneSize] = + Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size; + else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i]) + // Found a mismatch with the repeated mask. + return false; + } + return true; +} + /// \brief Checks whether a shuffle mask is equivalent to an explicit list of /// arguments. /// @@ -6316,6 +6349,22 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL, return DAG.getConstant(Imm, DL, MVT::i8); } +/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask. +/// +/// This helper function produces an 8-bit shuffle immediate corresponding to +/// the ubiquitous shuffle encoding scheme used in x86 instructions for +/// shuffling 8 lanes. +static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL, + SelectionDAG &DAG) { + assert(Mask.size() <= 8 && + "Up to 8 elts may be in Imm8 1-bit lane shuffle mask"); + unsigned Imm = 0; + for (unsigned i = 0; i < Mask.size(); ++i) + if (Mask[i] >= 0) + Imm |= (Mask[i] % 2) << i; + return DAG.getConstant(Imm, DL, MVT::i8); +} + /// \brief Try to emit a blend instruction for a shuffle using bit math. /// /// This is used as a fallback approach when first class blend instructions are @@ -6341,10 +6390,9 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask); // We have to cast V2 around. MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); - V2 = DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::ANDNP, DL, MaskVT, - DAG.getNode(ISD::BITCAST, DL, MaskVT, V1Mask), - DAG.getNode(ISD::BITCAST, DL, MaskVT, V2))); + V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT, + DAG.getBitcast(MaskVT, V1Mask), + DAG.getBitcast(MaskVT, V2))); return DAG.getNode(ISD::OR, DL, VT, V1, V2); } @@ -6395,11 +6443,11 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, BlendMask |= 1u << (i * Scale + j); MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32; - V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8))); + V1 = DAG.getBitcast(BlendVT, V1); + V2 = DAG.getBitcast(BlendVT, V2); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2, + DAG.getConstant(BlendMask, DL, MVT::i8))); } // FALLTHROUGH case MVT::v8i16: { @@ -6412,11 +6460,11 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, for (int j = 0; j < Scale; ++j) BlendMask |= 1u << (i * Scale + j); - V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8))); + V1 = DAG.getBitcast(MVT::v8i16, V1); + V2 = DAG.getBitcast(MVT::v8i16, V2); + return DAG.getBitcast(VT, + DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, + DAG.getConstant(BlendMask, DL, MVT::i8))); } case MVT::v16i16: { @@ -6465,13 +6513,12 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, MVT::i8)); - V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2); - return DAG.getNode( - ISD::BITCAST, DL, VT, - DAG.getNode(ISD::VSELECT, DL, BlendVT, - DAG.getNode(ISD::BUILD_VECTOR, DL, BlendVT, VSELECTMask), - V1, V2)); + V1 = DAG.getBitcast(BlendVT, V1); + V2 = DAG.getBitcast(BlendVT, V2); + return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, DL, BlendVT, + DAG.getNode(ISD::BUILD_VECTOR, DL, + BlendVT, VSELECTMask), + V1, V2)); } default: @@ -6652,13 +6699,12 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, if (Subtarget->hasSSSE3()) { // Cast the inputs to i8 vector of correct length to match PALIGNR. MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes); - Lo = DAG.getNode(ISD::BITCAST, DL, AlignVT, Lo); - Hi = DAG.getNode(ISD::BITCAST, DL, AlignVT, Hi); + Lo = DAG.getBitcast(AlignVT, Lo); + Hi = DAG.getBitcast(AlignVT, Hi); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo, - DAG.getConstant(Rotation * Scale, DL, - MVT::i8))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo, + DAG.getConstant(Rotation * Scale, DL, MVT::i8))); } assert(VT.getSizeInBits() == 128 && @@ -6671,15 +6717,15 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, int HiByteShift = Rotation * Scale; // Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ. - Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Lo); - Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi); + Lo = DAG.getBitcast(MVT::v2i64, Lo); + Hi = DAG.getBitcast(MVT::v2i64, Hi); SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo, DAG.getConstant(LoByteShift, DL, MVT::i8)); SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi, DAG.getConstant(HiByteShift, DL, MVT::i8)); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift)); + return DAG.getBitcast(VT, + DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift)); } /// \brief Compute whether each element of a shuffle is zeroable. @@ -6740,8 +6786,8 @@ static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1, SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, IntEltVT); if (EltVT.isFloatingPoint()) { - Zero = DAG.getNode(ISD::BITCAST, DL, EltVT, Zero); - AllOnes = DAG.getNode(ISD::BITCAST, DL, EltVT, AllOnes); + Zero = DAG.getBitcast(EltVT, Zero); + AllOnes = DAG.getBitcast(EltVT, AllOnes); } SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero); SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); @@ -6833,11 +6879,11 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1, MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale); assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && "Illegal integer vector type"); - V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V); + V = DAG.getBitcast(ShiftVT, V); V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, DL, MVT::i8)); - return DAG.getNode(ISD::BITCAST, DL, VT, V); + return DAG.getBitcast(VT, V); }; // SSE/AVX supports logical shifts up to 64-bit integers - so we can just @@ -6878,31 +6924,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( if (Subtarget->hasSSE41()) { MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV)); + return DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV)); } // For any extends we can cheat for larger element sizes and use shuffle // instructions that can fold with a load and/or copy. if (AnyExt && EltBits == 32) { int PSHUFDMask[4] = {0, -1, 1, -1}; - return DAG.getNode( - ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, - DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, + DAG.getBitcast(MVT::v4i32, InputV), + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } if (AnyExt && EltBits == 16 && Scale > 2) { int PSHUFDMask[4] = {0, -1, 0, -1}; InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, - DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV), + DAG.getBitcast(MVT::v4i32, InputV), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); int PSHUFHWMask[4] = {1, -1, -1, -1}; - return DAG.getNode( - ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV), - getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, + DAG.getBitcast(MVT::v8i16, InputV), + getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG))); } // If this would require more than 2 unpack instructions to expand, use @@ -6914,11 +6957,11 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( for (int i = 0; i < 16; ++i) PSHUFBMask[i] = DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8); - InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, - DAG.getNode(ISD::BUILD_VECTOR, DL, - MVT::v16i8, PSHUFBMask))); + InputV = DAG.getBitcast(MVT::v16i8, InputV); + return DAG.getBitcast(VT, + DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, + DAG.getNode(ISD::BUILD_VECTOR, DL, + MVT::v16i8, PSHUFBMask))); } // Otherwise emit a sequence of unpacks. @@ -6926,13 +6969,13 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements); SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT) : getZeroVector(InputVT, Subtarget, DAG, DL); - InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV); + InputV = DAG.getBitcast(InputVT, InputV); InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext); Scale /= 2; EltBits *= 2; NumElements /= 2; } while (Scale > 1); - return DAG.getNode(ISD::BITCAST, DL, VT, InputV); + return DAG.getBitcast(VT, InputV); } /// \brief Try to lower a vector shuffle as a zero extension on any microarch. @@ -7030,9 +7073,9 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( }; if (SDValue V = CanZExtLowHalf()) { - V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V); + V = DAG.getBitcast(MVT::v2i64, V); V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V); - return DAG.getNode(ISD::BITCAST, DL, VT, V); + return DAG.getBitcast(VT, V); } // No viable ext lowering found. @@ -7106,7 +7149,7 @@ static SDValue lowerVectorShuffleAsElementInsertion( if (SDValue V2S = getScalarValueForVectorElement( V2, Mask[V2Index] - Mask.size(), DAG)) { // We need to zext the scalar if it is smaller than an i32. - V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S); + V2S = DAG.getBitcast(EltVT, V2S); if (EltVT == MVT::i8 || EltVT == MVT::i16) { // Using zext to expand a narrow element won't work for non-zero // insertions. @@ -7155,7 +7198,7 @@ static SDValue lowerVectorShuffleAsElementInsertion( V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2); if (ExtVT != VT) - V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2); + V2 = DAG.getBitcast(VT, V2); if (V2Index != 0) { // If we have 4 or fewer lanes we can cheaply shuffle the element into @@ -7167,13 +7210,13 @@ static SDValue lowerVectorShuffleAsElementInsertion( V2Shuffle[V2Index] = 0; V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); } else { - V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2); + V2 = DAG.getBitcast(MVT::v2i64, V2); V2 = DAG.getNode( X86ISD::VSHLDQ, DL, MVT::v2i64, V2, DAG.getConstant( V2Index * EltVT.getSizeInBits()/8, DL, DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64))); - V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2); + V2 = DAG.getBitcast(VT, V2); } } return V2; @@ -7396,13 +7439,13 @@ static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1, V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); // Cast the inputs to the type we will use to unpack them. - V1 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, UnpackVT, V2); + V1 = DAG.getBitcast(UnpackVT, V1); + V2 = DAG.getBitcast(UnpackVT, V2); // Unpack the inputs and cast the result back to the desired type. - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, - DL, UnpackVT, V1, V2)); + return DAG.getBitcast( + VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, + UnpackVT, V1, V2)); }; // We try each unpack from the largest to the smallest to try and find one @@ -7558,12 +7601,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // Straight shuffle of a single input vector. For everything from SSE2 // onward this has a single fast instruction with no scary immediates. // We have to map the mask as it is actually a v4i32 shuffle instruction. - V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1); + V1 = DAG.getBitcast(MVT::v4i32, V1); int WidenedMask[4] = { std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1, std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1}; - return DAG.getNode( - ISD::BITCAST, DL, MVT::v2i64, + return DAG.getBitcast( + MVT::v2i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG))); } @@ -7584,12 +7627,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, }; if (SDValue V1Pack = GetPackNode(V1)) if (SDValue V2Pack = GetPackNode(V2)) - return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, - DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, - Mask[0] == 0 ? V1Pack.getOperand(0) - : V1Pack.getOperand(1), - Mask[1] == 2 ? V2Pack.getOperand(0) - : V2Pack.getOperand(1))); + return DAG.getBitcast(MVT::v2i64, + DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, + Mask[0] == 0 ? V1Pack.getOperand(0) + : V1Pack.getOperand(1), + Mask[1] == 2 ? V2Pack.getOperand(0) + : V2Pack.getOperand(1))); // Try to use shift instructions. if (SDValue Shift = @@ -7639,10 +7682,10 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // incur 2 cycles of stall for integer vectors on Nehalem and older chips. // However, all the alternatives are still more cycles and newer chips don't // have this problem. It would be really nice if x86 had better shuffles here. - V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2); - return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, - DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); + V1 = DAG.getBitcast(MVT::v2f64, V1); + V2 = DAG.getBitcast(MVT::v2f64, V2); + return DAG.getBitcast(MVT::v2i64, + DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); } /// \brief Test whether this can be lowered with a single SHUFPS instruction. @@ -7941,11 +7984,10 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // up the inputs, bypassing domain shift penalties that we would encur if we // directly used PSHUFD on Nehalem and older. For newer chips, this isn't // relevant. - return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, - DAG.getVectorShuffle( - MVT::v4f32, DL, - DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1), - DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask)); + return DAG.getBitcast( + MVT::v4i32, + DAG.getVectorShuffle(MVT::v4f32, DL, DAG.getBitcast(MVT::v4f32, V1), + DAG.getBitcast(MVT::v4f32, V2), Mask)); } /// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 @@ -8123,11 +8165,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( int PSHUFDMask[] = {0, 1, 2, 3}; PSHUFDMask[ADWord] = BDWord; PSHUFDMask[BDWord] = ADWord; - V = DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, - DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, - DAG))); + V = DAG.getBitcast( + VT, + DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); // Adjust the mask to match the new locations of A and B. for (int &M : Mask) @@ -8368,11 +8409,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG)); if (!isNoopShuffleMask(PSHUFDMask)) - V = DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, - DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, - DAG))); + V = DAG.getBitcast( + VT, + DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); // At this point, each half should contain all its inputs, and we can then // just shuffle them into their final position. @@ -8433,11 +8473,11 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1, if (V1InUse) V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, - DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V1), + DAG.getBitcast(MVT::v16i8, V1), DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask)); if (V2InUse) V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, - DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, V2), + DAG.getBitcast(MVT::v16i8, V2), DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask)); // If we need shuffled inputs from both, blend the two. @@ -8448,7 +8488,7 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1, V = V1InUse ? V1 : V2; // Cast the result back to the correct type. - return DAG.getNode(ISD::BITCAST, DL, VT, V); + return DAG.getBitcast(VT, V); } /// \brief Generic lowering of 8-lane i16 shuffles. @@ -8749,10 +8789,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // Update the lane map based on the mapping we ended up with. LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; } - V1 = DAG.getNode( - ISD::BITCAST, DL, MVT::v16i8, - DAG.getVectorShuffle(MVT::v8i16, DL, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), + V1 = DAG.getBitcast( + MVT::v16i8, + DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); // Unpack the bytes to form the i16s that will be shuffled into place. @@ -8770,10 +8809,9 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, assert(PostDupI16Shuffle[i / 2] == MappedMask && "Conflicting entrties in the original shuffle!"); } - return DAG.getNode( - ISD::BITCAST, DL, MVT::v16i8, - DAG.getVectorShuffle(MVT::v8i16, DL, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), + return DAG.getBitcast( + MVT::v16i8, + DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); }; if (SDValue V = tryToWidenViaDuplication()) @@ -8866,19 +8904,18 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // We use the mask type to pick which bytes are preserved based on how many // elements are dropped. MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 }; - SDValue ByteClearMask = - DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, - DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); + SDValue ByteClearMask = DAG.getBitcast( + MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask); if (!IsSingleInput) V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask); // Now pack things back together. - V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1); - V2 = IsSingleInput ? V1 : DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2); + V1 = DAG.getBitcast(MVT::v8i16, V1); + V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2); SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2); for (int i = 1; i < NumEvenDrops; ++i) { - Result = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, Result); + Result = DAG.getBitcast(MVT::v8i16, Result); Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result); } @@ -8912,7 +8949,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, std::none_of(std::begin(HiBlendMask), std::end(HiBlendMask), [](int M) { return M >= 0 && M % 2 == 1; })) { // Use a mask to drop the high bytes. - VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); + VLoHalf = DAG.getBitcast(MVT::v8i16, V); VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf, DAG.getConstant(0x00FF, DL, MVT::v8i16)); @@ -8929,10 +8966,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, } else { // Otherwise just unpack the low half of V into VLoHalf and the high half into // VHiHalf so that we can blend them as i16s. - VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); - VHiHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, - DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); + VLoHalf = DAG.getBitcast( + MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); + VHiHalf = DAG.getBitcast( + MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); } SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); @@ -9073,8 +9110,8 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1, LoV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, LoOps); HiV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, HiOps); } - return std::make_pair(DAG.getNode(ISD::BITCAST, DL, SplitVT, LoV), - DAG.getNode(ISD::BITCAST, DL, SplitVT, HiV)); + return std::make_pair(DAG.getBitcast(SplitVT, LoV), + DAG.getBitcast(SplitVT, HiV)); }; SDValue LoV1, HiV1, LoV2, HiV2; @@ -9407,12 +9444,12 @@ static SDValue lowerVectorShuffleByMerging128BitLanes( LaneMask[2 * i + 1] = 2*Lanes[i] + 1; } - V1 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V2); + V1 = DAG.getBitcast(LaneVT, V1); + V2 = DAG.getBitcast(LaneVT, V2); SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask); // Cast it back to the type we actually want. - LaneShuffle = DAG.getNode(ISD::BITCAST, DL, VT, LaneShuffle); + LaneShuffle = DAG.getBitcast(VT, LaneShuffle); // Now do a simple shuffle that isn't lane crossing. SmallVector<int, 8> NewMask; @@ -9441,6 +9478,37 @@ static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) { return true; } +static SDValue lowerVectorShuffleWithSHUFPD(SDLoc DL, MVT VT, + ArrayRef<int> Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { + + // Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, .. + // Mask for V4F64; 0/1, 4/5, 2/3, 6/7.. + assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for VSHUFPD"); + int NumElts = VT.getVectorNumElements(); + bool ShufpdMask = true; + bool CommutableMask = true; + unsigned Immediate = 0; + for (int i = 0; i < NumElts; ++i) { + if (Mask[i] < 0) + continue; + int Val = (i & 6) + NumElts * (i & 1); + int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1); + if (Mask[i] < Val || Mask[i] > Val + 1) + ShufpdMask = false; + if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1) + CommutableMask = false; + Immediate |= (Mask[i] % 2) << i; + } + if (ShufpdMask) + return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, + DAG.getConstant(Immediate, DL, MVT::i8)); + if (CommutableMask) + return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, + DAG.getConstant(Immediate, DL, MVT::i8)); + return SDValue(); +} + /// \brief Handle lowering of 4-lane 64-bit floating point shuffles. /// /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 @@ -9505,24 +9573,9 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Blend; // Check if the blend happens to exactly fit that of SHUFPD. - if ((Mask[0] == -1 || Mask[0] < 2) && - (Mask[1] == -1 || (Mask[1] >= 4 && Mask[1] < 6)) && - (Mask[2] == -1 || (Mask[2] >= 2 && Mask[2] < 4)) && - (Mask[3] == -1 || Mask[3] >= 6)) { - unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) | - ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3); - return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); - } - if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) && - (Mask[1] == -1 || Mask[1] < 2) && - (Mask[2] == -1 || Mask[2] >= 6) && - (Mask[3] == -1 || (Mask[3] >= 2 && Mask[3] < 4))) { - unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) | - ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3); - return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); - } + if (SDValue Op = + lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG)) + return Op; // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, @@ -9584,10 +9637,10 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, PSHUFDMask[2 * i] = 2 * RepeatedMask[i]; PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1; } - return DAG.getNode( - ISD::BITCAST, DL, MVT::v4i64, + return DAG.getBitcast( + MVT::v4i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, - DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1), + DAG.getBitcast(MVT::v8i32, V1), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } } @@ -9700,11 +9753,11 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask)); if (Subtarget->hasAVX2()) - return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, - DAG.getNode(ISD::BITCAST, DL, MVT::v8f32, - DAG.getNode(ISD::BUILD_VECTOR, DL, + return DAG.getNode( + X86ISD::VPERMV, DL, MVT::v8f32, + DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask)), - V1); + V1); // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, @@ -9894,12 +9947,11 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8); PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8); } - return DAG.getNode( - ISD::BITCAST, DL, MVT::v16i16, - DAG.getNode( - X86ISD::PSHUFB, DL, MVT::v32i8, - DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1), - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask))); + return DAG.getBitcast(MVT::v16i16, + DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, + DAG.getBitcast(MVT::v32i8, V1), + DAG.getNode(ISD::BUILD_VECTOR, DL, + MVT::v32i8, PSHUFBMask))); } // Try to simplify this by merging 128-bit lanes to enable a lane-based @@ -10039,10 +10091,9 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits), VT.getVectorNumElements()); - V1 = DAG.getNode(ISD::BITCAST, DL, FpVT, V1); - V2 = DAG.getNode(ISD::BITCAST, DL, FpVT, V2); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); + V1 = DAG.getBitcast(FpVT, V1); + V2 = DAG.getBitcast(FpVT, V2); + return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); } switch (VT.SimpleTy) { @@ -10064,64 +10115,60 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, } } -/// \brief Handle lowering of 8-lane 64-bit floating point shuffles. -static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - SDLoc DL(Op); - assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); - assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); - ArrayRef<int> Mask = SVOp->getMask(); - assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - - // X86 has dedicated unpack instructions that can handle specific blend - // operations: UNPCKH and UNPCKL. - if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2); +static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT, + ArrayRef<int> Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG); + assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN"); + // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right) + int AlignVal = -1; + for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) + continue; + if (Mask[i] < i) + return SDValue(); + if (AlignVal == -1) + AlignVal = Mask[i] - i; + else if (Mask[i] - i != AlignVal) + return SDValue(); + } + // Vector source operands should be swapped + return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1, + DAG.getConstant(AlignVal, DL, MVT::i8)); } -/// \brief Handle lowering of 16-lane 32-bit floating point shuffles. -static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - SDLoc DL(Op); - assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); - assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); - ArrayRef<int> Mask = SVOp->getMask(); - assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); +static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT, + ArrayRef<int> Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { - // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane. - 0, 16, 1, 17, 4, 20, 5, 21, - // Second 128-bit lane. - 8, 24, 9, 25, 12, 28, 13, 29})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane. - 2, 18, 3, 19, 6, 22, 7, 23, - // Second 128-bit lane. - 10, 26, 11, 27, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2); + assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV"); - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG); + MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); + + SmallVector<SDValue, 32> VPermMask; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) : + DAG.getConstant(Mask[i], DL,MaskEltVT)); + SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT, + VPermMask); + if (isSingleInputShuffleMask(Mask)) + return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); + + return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2); } -/// \brief Handle lowering of 8-lane 64-bit integer shuffles. -static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + +/// \brief Handle lowering of 8-lane 64-bit floating point shuffles. +static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc DL(Op); - assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); - assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); + MVT VT = Op.getSimpleValueType(); + assert((V1.getSimpleValueType() == MVT::v8f64 || + V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!"); + assert((V2.getSimpleValueType() == MVT::v8f64 || + V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); @@ -10129,21 +10176,40 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // X86 has dedicated unpack instructions that can handle specific blend // operations: UNPCKH and UNPCKL. if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2); + return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2); + return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG); + if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG)) + return Op; + + if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG)) + return Op; + + // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7 + if (isSingleInputShuffleMask(Mask)) { + if (!is128BitLaneCrossingShuffleMask(VT, Mask)) + return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1, + get1bitLaneShuffleImm8ForMask(Mask, DL, DAG)); + + SmallVector<int, 4> RepeatedMask; + if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) + return DAG.getNode(X86ISD::VPERMI, DL, VT, V1, + getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); + } + return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG); } /// \brief Handle lowering of 16-lane 32-bit integer shuffles. -static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, +static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); - assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); - assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); + assert((V1.getSimpleValueType() == MVT::v16i32 || + V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!"); + assert((V2.getSimpleValueType() == MVT::v16i32 || + V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); @@ -10154,16 +10220,39 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, 0, 16, 1, 17, 4, 20, 5, 21, // Second 128-bit lane. 8, 24, 9, 25, 12, 28, 13, 29})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2); + return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, {// First 128-bit lane. 2, 18, 3, 19, 6, 22, 7, 23, // Second 128-bit lane. 10, 26, 11, 27, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2); + return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG); + if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, + 12, 12, 14, 14})) + return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1); + if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, + 13, 13, 15, 15})) + return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1); + + SmallVector<int, 4> RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) { + if (isSingleInputShuffleMask(Mask)) { + unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI; + return DAG.getNode(Opc, DL, VT, V1, + getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); + } + + for (int i = 0; i < 4; ++i) + if (RepeatedMask[i] >= 16) + RepeatedMask[i] -= 12; + return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG); + } + + if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG)) + return Op; + + return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG); } /// \brief Handle lowering of 32-lane 16-bit integer shuffles. @@ -10223,13 +10312,11 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, // the requisite ISA extensions for that element type are available. switch (VT.SimpleTy) { case MVT::v8f64: - return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG); - case MVT::v16f32: - return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v8i64: - return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); + return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v16f32: case MVT::v16i32: - return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); + return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i16: if (Subtarget->hasBWI()) return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG); @@ -10311,10 +10398,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, // Make sure that the new vector type is legal. For example, v2f64 isn't // legal on SSE1. if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { - V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); - V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask)); + V1 = DAG.getBitcast(NewVT, V1); + V2 = DAG.getBitcast(NewVT, V2); + return DAG.getBitcast( + VT, DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask)); } } @@ -10509,12 +10596,11 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); // If Idx is 0, it's cheaper to do a move instead of a pextrw. if (Idx == 0) - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, - MVT::v4i32, - Op.getOperand(0)), - Op.getOperand(1))); + return DAG.getNode( + ISD::TRUNCATE, dl, MVT::i16, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + DAG.getBitcast(MVT::v4i32, Op.getOperand(0)), + Op.getOperand(1))); SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, @@ -10538,10 +10624,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { User->getValueType(0) != MVT::i32)) return SDValue(); SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, - Op.getOperand(0)), - Op.getOperand(1)); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract); + DAG.getBitcast(MVT::v4i32, Op.getOperand(0)), + Op.getOperand(1)); + return DAG.getBitcast(MVT::f32, Extract); } if (VT == MVT::i32 || VT == MVT::i64) { @@ -10655,8 +10740,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, if (Idx == 0) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, - MVT::v4i32, Vec), + DAG.getBitcast(MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. MVT EltVT = MVT::i32; @@ -10877,8 +10961,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); assert(OpVT.is128BitVector() && "Expected an SSE type!"); - return DAG.getNode(ISD::BITCAST, dl, OpVT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); + return DAG.getBitcast( + OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); } // Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in @@ -11670,14 +11754,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, MachinePointerInfo::getConstantPool(), false, false, false, 16); - SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1), - CLod0); + SDValue Unpck1 = + getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, MachinePointerInfo::getConstantPool(), false, false, false, 16); - SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1); + SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; @@ -11685,12 +11768,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'. Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { - SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub); + SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub); SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32, S2F, 0x4E, DAG); Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle), - Sub); + DAG.getBitcast(MVT::v2f64, Shuffle), Sub); } return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, @@ -11713,20 +11795,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), + DAG.getBitcast(MVT::v2f64, Load), DAG.getIntPtrConstant(0, dl)); // Or the load with the bias. - SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, - MVT::v2f64, Load)), - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, - MVT::v2f64, Bias))); - Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or), - DAG.getIntPtrConstant(0, dl)); + SDValue Or = DAG.getNode( + ISD::OR, dl, MVT::v2i64, + DAG.getBitcast(MVT::v2i64, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)), + DAG.getBitcast(MVT::v2i64, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); + Or = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, + DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); // Subtract the bias. SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); @@ -11805,19 +11886,16 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, if (Subtarget.hasSSE41()) { EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); - SDValue VecCstLowBitcast = - DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstLow); - SDValue VecBitcast = DAG.getNode(ISD::BITCAST, DL, VecI16VT, V); + SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); + SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); // Low will be bitcasted right away, so do not bother bitcasting back to its // original type. Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); - SDValue VecCstHighBitcast = - DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstHigh); - SDValue VecShiftBitcast = - DAG.getNode(ISD::BITCAST, DL, VecI16VT, HighShift); + SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); + SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift); // High will be bitcasted right away, so do not bother bitcasting back to // its original type. High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, @@ -11843,11 +11921,11 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, makeArrayRef(&CstFAddArray[0], NumElts)); // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); - SDValue HighBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, High); + SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); SDValue FHigh = DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd); // return (float4) lo + fhi; - SDValue LowBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, Low); + SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } @@ -12103,8 +12181,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, MVT HVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); - OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + OpLo = DAG.getBitcast(HVT, OpLo); + OpHi = DAG.getBitcast(HVT, OpHi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } @@ -12189,14 +12267,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI()) return Op; // legal, will go to VPMOVB2M, VPMOVW2M - if ((InVT.is256BitVector() || InVT.is128BitVector()) + if ((InVT.is256BitVector() || InVT.is128BitVector()) && InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() && Subtarget->hasVLX()) return Op; // legal, will go to VPMOVB2M, VPMOVW2M if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI()) return Op; // legal, will go to VPMOVD2M, VPMOVQ2M - if ((InVT.is256BitVector() || InVT.is128BitVector()) + if ((InVT.is256BitVector() || InVT.is128BitVector()) && InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() && Subtarget->hasVLX()) return Op; // legal, will go to VPMOVB2M, VPMOVQ2M @@ -12224,7 +12302,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; - In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In); + In = DAG.getBitcast(MVT::v8i32, In); In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32), ShufMask); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, @@ -12235,8 +12313,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DAG.getIntPtrConstant(0, DL)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(2, DL)); - OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); + OpLo = DAG.getBitcast(MVT::v4i32, OpLo); + OpHi = DAG.getBitcast(MVT::v4i32, OpHi); static const int ShufMask[] = {0, 2, 4, 6}; return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); } @@ -12244,7 +12322,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { // On AVX2, v8i32 -> v8i16 becomed PSHUFB. if (Subtarget->hasInt256()) { - In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In); + In = DAG.getBitcast(MVT::v32i8, In); SmallVector<SDValue,32> pshufbMask; for (unsigned i = 0; i < 2; ++i) { @@ -12261,14 +12339,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { } SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask); In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); - In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In); + In = DAG.getBitcast(MVT::v4i64, In); static const int ShufMask[] = {0, 2, -1, -1}; In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64), &ShufMask[0]); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); - return DAG.getNode(ISD::BITCAST, DL, VT, In); + return DAG.getBitcast(VT, In); } SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, @@ -12277,8 +12355,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, DAG.getIntPtrConstant(4, DL)); - OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi); + OpLo = DAG.getBitcast(MVT::v16i8, OpLo); + OpHi = DAG.getBitcast(MVT::v16i8, OpHi); // The PSHUFB mask: static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, @@ -12288,13 +12366,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1); OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1); - OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); + OpLo = DAG.getBitcast(MVT::v4i32, OpLo); + OpHi = DAG.getBitcast(MVT::v4i32, OpHi); // The MOVLHPS Mask: static const int ShufMask2[] = {0, 1, 4, 5}; SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); - return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res); + return DAG.getBitcast(MVT::v8i16, res); } // Handle truncation of V256 to V128 using shuffles. @@ -12310,8 +12388,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { // Prepare truncation shuffle mask for (unsigned i = 0; i != NumElems; ++i) MaskVec[i] = i * 2; - SDValue V = DAG.getVectorShuffle(NVT, DL, - DAG.getNode(ISD::BITCAST, DL, NVT, In), + SDValue V = DAG.getVectorShuffle(NVT, DL, DAG.getBitcast(NVT, In), DAG.getUNDEF(NVT), &MaskVec[0]); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, DAG.getIntPtrConstant(0, DL)); @@ -12420,13 +12497,12 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { // For a vector, cast operands to a vector type, perform the logic op, // and cast the result back to the original value type. MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); - SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask); - SDValue Operand = IsFNABS ? - DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) : - DAG.getNode(ISD::BITCAST, dl, VecVT, Op0); + SDValue MaskCasted = DAG.getBitcast(VecVT, Mask); + SDValue Operand = IsFNABS ? DAG.getBitcast(VecVT, Op0.getOperand(0)) + : DAG.getBitcast(VecVT, Op0); unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR; - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted)); + return DAG.getBitcast(VT, + DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted)); } // If not vector, then scalar. @@ -12591,7 +12667,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget, // Cast all vectors into TestVT for PTEST. for (unsigned i = 0, e = VecIns.size(); i < e; ++i) - VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]); + VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]); // If more than one full vectors are evaluated, OR them first before PTEST. for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) { @@ -12925,29 +13001,31 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const { - // FIXME: We should use instruction latency models to calculate the cost of - // each potential sequence, but this is very hard to do reliably because - // at least Intel's Core* chips have variable timing based on the number of - // significant digits in the divisor and/or sqrt operand. - if (!Subtarget->useSqrtEst()) - return SDValue(); - EVT VT = Op.getValueType(); + const char *RecipOp; - // SSE1 has rsqrtss and rsqrtps. + // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision // rsqrt estimate with refinement on x86 prior to FMA requires at least 16 // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || - (Subtarget->hasAVX() && VT == MVT::v8f32)) { - RefinementSteps = 1; - UseOneConstNR = false; - return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); - } - return SDValue(); + if (VT == MVT::f32 && Subtarget->hasSSE1()) + RecipOp = "sqrtf"; + else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || + (VT == MVT::v8f32 && Subtarget->hasAVX())) + RecipOp = "vec-sqrtf"; + else + return SDValue(); + + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); + UseOneConstNR = false; + return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); } /// The minimum architected relative accuracy is 2^-12. We need one @@ -12955,15 +13033,9 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, SDValue X86TargetLowering::getRecipEstimate(SDValue Op, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const { - // FIXME: We should use instruction latency models to calculate the cost of - // each potential sequence, but this is very hard to do reliably because - // at least Intel's Core* chips have variable timing based on the number of - // significant digits in the divisor. - if (!Subtarget->useReciprocalEst()) - return SDValue(); - EVT VT = Op.getValueType(); - + const char *RecipOp; + // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). // It is likely not profitable to do this for f64 because a double-precision @@ -12971,12 +13043,20 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || - (Subtarget->hasAVX() && VT == MVT::v8f32)) { - RefinementSteps = ReciprocalEstimateRefinementSteps; - return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); - } - return SDValue(); + if (VT == MVT::f32 && Subtarget->hasSSE1()) + RecipOp = "divf"; + else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) || + (VT == MVT::v8f32 && Subtarget->hasAVX())) + RecipOp = "vec-divf"; + else + return SDValue(); + + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); + return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); } /// If we have at least two divisions that use the same divisor, convert to @@ -13407,8 +13487,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, assert(Subtarget->hasSSE2() && "Don't know how to lower!"); // First cast everything to the right type. - Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); - Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + Op0 = DAG.getBitcast(MVT::v4i32, Op0); + Op1 = DAG.getBitcast(MVT::v4i32, Op1); // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. The lower @@ -13442,7 +13522,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); + return DAG.getBitcast(VT, Result); } if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { @@ -13451,8 +13531,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); // First cast everything to the right type. - Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); - Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + Op0 = DAG.getBitcast(MVT::v4i32, Op0); + Op1 = DAG.getBitcast(MVT::v4i32, Op1); // Do the compare. SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); @@ -13465,7 +13545,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (Invert) Result = DAG.getNOT(dl, Result, MVT::v4i32); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); + return DAG.getBitcast(VT, Result); } } @@ -13662,7 +13742,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; - VCmp = DAG.getNode(ISD::BITCAST, DL, VCmpVT, VCmp); + VCmp = DAG.getBitcast(VCmpVT, VCmp); SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); @@ -13687,12 +13767,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) Op2Scalar = Op2.getOperand(0); if (Op1Scalar.getNode() && Op2Scalar.getNode()) { - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, + SDValue newSelect = DAG.getNode(ISD::SELECT, DL, Op1Scalar.getValueType(), Cond, Op1Scalar, Op2Scalar); if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, DL, VT, newSelect); - SDValue ExtVec = DAG.getNode(ISD::BITCAST, DL, MVT::v8i1, newSelect); + return DAG.getBitcast(VT, newSelect); + SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec, DAG.getIntPtrConstant(0, DL)); } @@ -13975,7 +14055,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr); MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2); CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2); - Curr = DAG.getNode(ISD::BITCAST, dl, CurrVT, Curr); + Curr = DAG.getBitcast(CurrVT, Curr); } SDValue SignExt = Curr; @@ -13993,7 +14073,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr, DAG.getConstant(31, dl, MVT::i8)); SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5}); - return DAG.getNode(ISD::BITCAST, dl, VT, Ext); + return DAG.getBitcast(VT, Ext); } return SDValue(); @@ -14202,7 +14282,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, // Bitcast the loaded value to a vector of the original element type, in // the size of the target vector type. - SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res); + SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res); unsigned SizeRatio = RegSz / MemSz; if (Ext == ISD::SEXTLOAD) { @@ -14227,7 +14307,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, SDValue Shuff = DAG.getVectorShuffle( WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); - Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + Shuff = DAG.getBitcast(RegVT, Shuff); // Build the arithmetic shift. unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - @@ -14249,7 +14329,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); // Bitcast to the requested type. - Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + Shuff = DAG.getBitcast(RegVT, Shuff); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Shuff; } @@ -14933,7 +15013,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, MVT EltVT = VT.getVectorElementType(); EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); - ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt); + ShAmt = DAG.getBitcast(ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } @@ -14959,8 +15039,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); switch (Op.getOpcode()) { default: break; @@ -15017,12 +15097,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Op.getOperand(2), Op.getOperand(3)); case INTR_TYPE_1OP_MASK_RM: { SDValue Src = Op.getOperand(1); - SDValue Src0 = Op.getOperand(2); + SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); - SDValue RoundingMode = Op.getOperand(4); + SDValue RoundingMode; + if (Op.getNumOperands() == 4) + RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + else + RoundingMode = Op.getOperand(4); + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue(); + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, Op.getValueType(), Src, RoundingMode), + Mask, PassThru, Subtarget, DAG); + } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, RoundingMode), - Mask, Src0, Subtarget, DAG); + Mask, PassThru, Subtarget, DAG); + } + case INTR_TYPE_1OP_MASK: { + SDValue Src = Op.getOperand(1); + SDValue Passthru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), + Mask, Passthru, Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); @@ -15069,6 +15168,30 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1,Src2), Mask, PassThru, Subtarget, DAG); } + case INTR_TYPE_3OP_MASK: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue PassThru = Op.getOperand(4); + SDValue Mask = Op.getOperand(5); + // We specify 2 possible opcodes for intrinsics with rounding modes. + // First, we check if the intrinsic may have non-default rounding mode, + // (IntrData->Opc1 != 0), then we check the rounding mode operand. + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + SDValue Rnd = Op.getOperand(6); + unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue(); + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, Op.getValueType(), + Src1, Src2, Src3, Rnd), + Mask, PassThru, Subtarget, DAG); + } + } + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Src3), + Mask, PassThru, Subtarget, DAG); + } case FMA_OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); @@ -15140,7 +15263,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), CmpMask, DAG.getIntPtrConstant(0, dl)); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + return DAG.getBitcast(Op.getValueType(), Res); } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; @@ -15176,7 +15299,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask.getValueType().getSizeInBits()); SDLoc dl(Op); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, @@ -15191,7 +15314,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask.getValueType().getSizeInBits()); SDLoc dl(Op); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1), Op.getOperand(2)); @@ -15211,16 +15334,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(1)); - case Intrinsic::x86_avx512_mask_valign_q_512: - case Intrinsic::x86_avx512_mask_valign_d_512: - // Vector source operands are swapped. - return getVectorMaskingNode(DAG.getNode(X86ISD::VALIGN, dl, - Op.getValueType(), Op.getOperand(2), - Op.getOperand(1), - Op.getOperand(3)), - Op.getOperand(5), Op.getOperand(4), - Subtarget, DAG); - // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. @@ -15289,8 +15402,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget case Intrinsic::x86_avx512_kortestz_w: case Intrinsic::x86_avx512_kortestc_w: { unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B; - SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1)); - SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2)); + SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); + SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2)); SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8); SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test); @@ -15378,7 +15491,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget // Compute the symbol for the LSDA. We know it'll get emitted later. MachineFunction &MF = DAG.getMachineFunction(); SDValue Op1 = Op.getOperand(1); - Op1->dump(); auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal()); MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( GlobalValue::getRealLinkageName(Fn->getName())); @@ -15409,7 +15521,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else - MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + MaskInReg = DAG.getBitcast(MaskVT, Mask); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -15437,7 +15549,7 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else - MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + MaskInReg = DAG.getBitcast(MaskVT, Mask); SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); @@ -15460,7 +15572,7 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else - MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + MaskInReg = DAG.getBitcast(MaskVT, Mask); //SDVTList VTs = DAG.getVTList(MVT::Other); SDValue Ops[] = {MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); @@ -15693,23 +15805,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); + EVT VT = DataToCompress.getValueType(); if (isAllOnes(Mask)) // return just a store return DAG.getStore(Chain, dl, DataToCompress, Addr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + VT.getScalarSizeInBits()/8); - EVT VT = DataToCompress.getValueType(); EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorNumElements()); EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, Mask.getValueType().getSizeInBits()); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, DAG.getUNDEF(VT)); return DAG.getStore(Chain, dl, Compressed, Addr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + VT.getScalarSizeInBits()/8); } case EXPAND_FROM_MEM: { SDLoc dl(Op); @@ -15721,17 +15835,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, if (isAllOnes(Mask)) // return just a load return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, - false, 0); + false, VT.getScalarSizeInBits()/8); EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorNumElements()); EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, Mask.getValueType().getSizeInBits()); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getBitcast(BitcastVT, Mask), DAG.getIntPtrConstant(0, dl)); SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), - false, false, false, 0); + false, false, false, + VT.getScalarSizeInBits()/8); SDValue Results[] = { DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru), @@ -16274,8 +16389,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, -1, 4, -1, 5, -1, 6, -1, 7}; ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); - ALo = DAG.getNode(ISD::BITCAST, dl, ExVT, ALo); - BLo = DAG.getNode(ISD::BITCAST, dl, ExVT, BLo); + ALo = DAG.getBitcast(ExVT, ALo); + BLo = DAG.getBitcast(ExVT, BLo); ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT)); BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT)); } @@ -16294,8 +16409,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, -1, 12, -1, 13, -1, 14, -1, 15}; AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); - AHi = DAG.getNode(ISD::BITCAST, dl, ExVT, AHi); - BHi = DAG.getNode(ISD::BITCAST, dl, ExVT, BHi); + AHi = DAG.getBitcast(ExVT, AHi); + BHi = DAG.getBitcast(ExVT, BHi); AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT)); BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT)); } @@ -16323,8 +16438,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Now multiply odd parts. SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds); - Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens); - Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds); + Evens = DAG.getBitcast(VT, Evens); + Odds = DAG.getBitcast(VT, Odds); // Merge the two vectors back together with a shuffle. This expands into 2 // shuffles. @@ -16352,10 +16467,10 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // Bit cast to 32-bit vectors for MULUDQ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32; - A = DAG.getNode(ISD::BITCAST, dl, MulVT, A); - B = DAG.getNode(ISD::BITCAST, dl, MulVT, B); - Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi); - Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi); + A = DAG.getBitcast(MulVT, A); + B = DAG.getBitcast(MulVT, B); + Ahi = DAG.getBitcast(MulVT, Ahi); + Bhi = DAG.getBitcast(MulVT, Bhi); SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); @@ -16417,7 +16532,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); - return DAG.getNode(ISD::BITCAST, dl, VT, CallInfo.first); + return DAG.getBitcast(VT, CallInfo.first); } static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, @@ -16455,12 +16570,10 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h> // => <2 x i64> <ae|cg> - SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); + SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef> // => <2 x i64> <bf|dh> - SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1)); + SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1)); // Shuffle it back into the right order. SDValue Highs, Lows; @@ -16499,16 +16612,16 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, // Return true if the requred (according to Opcode) shift-imm form is natively // supported by the Subtarget -static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, +static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { if (VT.getScalarSizeInBits() < 16) return false; - + if (VT.is512BitVector() && (VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI())) return true; - bool LShift = VT.is128BitVector() || + bool LShift = VT.is128BitVector() || (VT.is256BitVector() && Subtarget->hasInt256()); bool AShift = LShift && (Subtarget->hasVLX() || @@ -16518,15 +16631,15 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, // The shift amount is a variable, but it is the same for all vector lanes. // These instrcutions are defined together with shift-immediate. -static -bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, +static +bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); } // Return true if the requred (according to Opcode) variable-shift form is // natively supported by the Subtarget -static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, +static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16) @@ -16574,7 +16687,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Make a large shift. SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, ShiftAmt, DAG); - SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL); + SHL = DAG.getBitcast(VT, SHL); // Zero out the rightmost bits. SmallVector<SDValue, 32> V( NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8)); @@ -16585,7 +16698,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Make a large shift. SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, ShiftAmt, DAG); - SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL); + SRL = DAG.getBitcast(VT, SRL); // Zero out the leftmost bits. SmallVector<SDValue, 32> V( NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8)); @@ -16801,7 +16914,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, dl, VT)); - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); + Op = DAG.getBitcast(MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); } @@ -16871,11 +16984,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2); if (TargetOpcode == X86ISD::MOVSD) CastVT = MVT::v2i64; - SDValue BitCast1 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift1); - SDValue BitCast2 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift2); + SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1); + SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2); SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2, BitCast1, DAG); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); + return DAG.getBitcast(VT, Result); } } @@ -16931,10 +17044,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z); SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, R, R); SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, R, R); - ALo = DAG.getNode(ISD::BITCAST, dl, ExtVT, ALo); - AHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, AHi); - RLo = DAG.getNode(ISD::BITCAST, dl, ExtVT, RLo); - RHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, RHi); + ALo = DAG.getBitcast(ExtVT, ALo); + AHi = DAG.getBitcast(ExtVT, AHi); + RLo = DAG.getBitcast(ExtVT, RLo); + RHi = DAG.getBitcast(ExtVT, RHi); SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo); SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi); Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT)); @@ -17293,7 +17406,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts); - SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV); + SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64, DAG.getIntPtrConstant(0, dl)); } @@ -17315,141 +17428,241 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, return SDValue(); } -static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - SDNode *Node = Op.getNode(); - SDLoc dl(Node); +/// Compute the horizontal sum of bytes in V for the elements of VT. +/// +/// Requires V to be a byte vector and VT to be an integer vector type with +/// wider elements than V's type. The width of the elements of VT determines +/// how many bytes of V are summed horizontally to produce each element of the +/// result. +static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(V); + MVT ByteVecVT = V.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + int NumElts = VT.getVectorNumElements(); + assert(ByteVecVT.getVectorElementType() == MVT::i8 && + "Expected value to have byte element type."); + assert(EltVT != MVT::i8 && + "Horizontal byte sum only makes sense for wider elements!"); + unsigned VecSize = VT.getSizeInBits(); + assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!"); + + // PSADBW instruction horizontally add all bytes and leave the result in i64 + // chunks, thus directly computes the pop count for v2i64 and v4i64. + if (EltVT == MVT::i64) { + SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); + V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros); + return DAG.getBitcast(VT, V); + } + + if (EltVT == MVT::i32) { + // We unpack the low half and high half into i32s interleaved with zeros so + // that we can use PSADBW to horizontally sum them. The most useful part of + // this is that it lines up the results of two PSADBW instructions to be + // two v2i64 vectors which concatenated are the 4 population counts. We can + // then use PACKUSWB to shrink and concatenate them into a v4i32 again. + SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL); + SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V, Zeros); + SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V, Zeros); + + // Do the horizontal sums into two v2i64s. + Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); + Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + DAG.getBitcast(ByteVecVT, Low), Zeros); + High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + DAG.getBitcast(ByteVecVT, High), Zeros); + + // Merge them together. + MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16); + V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT, + DAG.getBitcast(ShortVecVT, Low), + DAG.getBitcast(ShortVecVT, High)); + + return DAG.getBitcast(VT, V); + } + + // The only element type left is i16. + assert(EltVT == MVT::i16 && "Unknown how to handle type"); + + // To obtain pop count for each i16 element starting from the pop count for + // i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s + // right by 8. It is important to shift as i16s as i8 vector shift isn't + // directly supported. + SmallVector<SDValue, 16> Shifters(NumElts, DAG.getConstant(8, DL, EltVT)); + SDValue Shifter = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters); + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), Shifter); + V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl), + DAG.getBitcast(ByteVecVT, V)); + return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), Shifter); +} + +static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, SDLoc DL, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + unsigned VecSize = VT.getSizeInBits(); - Op = Op.getOperand(0); - EVT VT = Op.getValueType(); - assert((VT.is128BitVector() || VT.is256BitVector()) && - "CTPOP lowering only implemented for 128/256-bit wide vector types"); + // Implement a lookup table in register by using an algorithm based on: + // http://wm.ite.pl/articles/sse-popcount.html + // + // The general idea is that every lower byte nibble in the input vector is an + // index into a in-register pre-computed pop count table. We then split up the + // input vector in two new ones: (1) a vector with only the shifted-right + // higher nibbles for each byte and (2) a vector with the lower nibbles (and + // masked out higher ones) for each byte. PSHUB is used separately with both + // to index the in-register table. Next, both are added and the result is a + // i8 vector where each element contains the pop count for input byte. + // + // To obtain the pop count for elements != i8, we follow up with the same + // approach and use additional tricks as described below. + // + const int LUT[16] = {/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2, + /* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3, + /* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3, + /* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4}; + + int NumByteElts = VecSize / 8; + MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts); + SDValue In = DAG.getBitcast(ByteVecVT, Op); + SmallVector<SDValue, 16> LUTVec; + for (int i = 0; i < NumByteElts; ++i) + LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); + SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, LUTVec); + SmallVector<SDValue, 16> Mask0F(NumByteElts, + DAG.getConstant(0x0F, DL, MVT::i8)); + SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Mask0F); + + // High nibbles + SmallVector<SDValue, 16> Four(NumByteElts, DAG.getConstant(4, DL, MVT::i8)); + SDValue FourV = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, Four); + SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV); + + // Low nibbles + SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F); + + // The input vector is used as the shuffle mask that index elements into the + // LUT. After counting low and high nibbles, add the vector to obtain the + // final pop count per i8 element. + SDValue HighPopCnt = + DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles); + SDValue LowPopCnt = + DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles); + SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt); - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - unsigned Len = EltVT.getSizeInBits(); + if (EltVT == MVT::i8) + return PopCnt; + + return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG); +} + +static SDValue LowerVectorCTPOPBitmath(SDValue Op, SDLoc DL, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + assert(VT.is128BitVector() && + "Only 128-bit vector bitmath lowering supported."); + + int VecSize = VT.getSizeInBits(); + MVT EltVT = VT.getVectorElementType(); + int Len = EltVT.getSizeInBits(); // This is the vectorized version of the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel // with a minor tweak to use a series of adds + shifts instead of vector - // multiplications. Implemented for the v2i64, v4i64, v4i32, v8i32 types: - // - // v2i64, v4i64, v4i32 => Only profitable w/ popcnt disabled - // v8i32 => Always profitable - // - // FIXME: There a couple of possible improvements: - // - // 1) Support for i8 and i16 vectors (needs measurements if popcnt enabled). - // 2) Use strategies from http://wm.ite.pl/articles/sse-popcount.html - // - assert(EltVT.isInteger() && (Len == 32 || Len == 64) && Len % 8 == 0 && - "CTPOP not implemented for this vector element type."); + // multiplications. Implemented for all integer vector types. We only use + // this when we don't have SSSE3 which allows a LUT-based lowering that is + // much faster, even faster than using native popcnt instructions. + + auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) { + MVT VT = V.getSimpleValueType(); + SmallVector<SDValue, 32> Shifters( + VT.getVectorNumElements(), + DAG.getConstant(Shifter, DL, VT.getVectorElementType())); + return DAG.getNode(OpCode, DL, VT, V, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters)); + }; + auto GetMask = [&](SDValue V, APInt Mask) { + MVT VT = V.getSimpleValueType(); + SmallVector<SDValue, 32> Masks( + VT.getVectorNumElements(), + DAG.getConstant(Mask, DL, VT.getVectorElementType())); + return DAG.getNode(ISD::AND, DL, VT, V, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Masks)); + }; - // X86 canonicalize ANDs to vXi64, generate the appropriate bitcasts to avoid - // extra legalization. - bool NeedsBitcast = EltVT == MVT::i32; - MVT BitcastVT = VT.is256BitVector() ? MVT::v4i64 : MVT::v2i64; + // We don't want to incur the implicit masks required to SRL vNi8 vectors on + // x86, so set the SRL type to have elements at least i16 wide. This is + // correct because all of our SRLs are followed immediately by a mask anyways + // that handles any bits that sneak into the high bits of the byte elements. + MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16); - SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, - EltVT); - SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, - EltVT); - SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, - EltVT); + SDValue V = Op; // v = v - ((v >> 1) & 0x55555555...) - SmallVector<SDValue, 8> Ones(NumElts, DAG.getConstant(1, dl, EltVT)); - SDValue OnesV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ones); - SDValue Srl = DAG.getNode(ISD::SRL, dl, VT, Op, OnesV); - if (NeedsBitcast) - Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl); - - SmallVector<SDValue, 8> Mask55(NumElts, Cst55); - SDValue M55 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask55); - if (NeedsBitcast) - M55 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M55); - - SDValue And = DAG.getNode(ISD::AND, dl, Srl.getValueType(), Srl, M55); - if (VT != And.getValueType()) - And = DAG.getNode(ISD::BITCAST, dl, VT, And); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op, And); + SDValue Srl = + DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1)); + SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55))); + V = DAG.getNode(ISD::SUB, DL, VT, V, And); // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) - SmallVector<SDValue, 8> Mask33(NumElts, Cst33); - SDValue M33 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask33); - SmallVector<SDValue, 8> Twos(NumElts, DAG.getConstant(2, dl, EltVT)); - SDValue TwosV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Twos); + SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33))); + Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2)); + SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33))); + V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS); - Srl = DAG.getNode(ISD::SRL, dl, VT, Sub, TwosV); - if (NeedsBitcast) { - Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl); - M33 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M33); - Sub = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Sub); - } + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl); + V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F))); - SDValue AndRHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Srl, M33); - SDValue AndLHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Sub, M33); - if (VT != AndRHS.getValueType()) { - AndRHS = DAG.getNode(ISD::BITCAST, dl, VT, AndRHS); - AndLHS = DAG.getNode(ISD::BITCAST, dl, VT, AndLHS); - } - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, AndLHS, AndRHS); + // At this point, V contains the byte-wise population count, and we are + // merely doing a horizontal sum if necessary to get the wider element + // counts. + if (EltVT == MVT::i8) + return V; - // v = (v + (v >> 4)) & 0x0F0F0F0F... - SmallVector<SDValue, 8> Fours(NumElts, DAG.getConstant(4, dl, EltVT)); - SDValue FoursV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Fours); - Srl = DAG.getNode(ISD::SRL, dl, VT, Add, FoursV); - Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl); - - SmallVector<SDValue, 8> Mask0F(NumElts, Cst0F); - SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask0F); - if (NeedsBitcast) { - Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add); - M0F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M0F); - } - And = DAG.getNode(ISD::AND, dl, M0F.getValueType(), Add, M0F); - if (VT != And.getValueType()) - And = DAG.getNode(ISD::BITCAST, dl, VT, And); - - // The algorithm mentioned above uses: - // v = (v * 0x01010101...) >> (Len - 8) - // - // Change it to use vector adds + vector shifts which yield faster results on - // Haswell than using vector integer multiplication. - // - // For i32 elements: - // v = v + (v >> 8) - // v = v + (v >> 16) - // - // For i64 elements: - // v = v + (v >> 8) - // v = v + (v >> 16) - // v = v + (v >> 32) - // - Add = And; - SmallVector<SDValue, 8> Csts; - for (unsigned i = 8; i <= Len/2; i *= 2) { - Csts.assign(NumElts, DAG.getConstant(i, dl, EltVT)); - SDValue CstsV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Csts); - Srl = DAG.getNode(ISD::SRL, dl, VT, Add, CstsV); - Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl); - Csts.clear(); + return LowerHorizontalByteSum( + DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget, + DAG); +} + +static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + // FIXME: Need to add AVX-512 support here! + assert((VT.is256BitVector() || VT.is128BitVector()) && + "Unknown CTPOP type to handle"); + SDLoc DL(Op.getNode()); + SDValue Op0 = Op.getOperand(0); + + if (!Subtarget->hasSSSE3()) { + // We can't use the fast LUT approach, so fall back on vectorized bitmath. + assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!"); + return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG); } - // The result is on the least significant 6-bits on i32 and 7-bits on i64. - SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), dl, - EltVT); - SmallVector<SDValue, 8> Cst3FV(NumElts, Cst3F); - SDValue M3F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Cst3FV); - if (NeedsBitcast) { - Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add); - M3F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M3F); + if (VT.is256BitVector() && !Subtarget->hasInt256()) { + unsigned NumElems = VT.getVectorNumElements(); + + // Extract each 128-bit vector, compute pop count and concat the result. + SDValue LHS = Extract128BitVector(Op0, 0, DAG, DL); + SDValue RHS = Extract128BitVector(Op0, NumElems/2, DAG, DL); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, + LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG), + LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG)); } - And = DAG.getNode(ISD::AND, dl, M3F.getValueType(), Add, M3F); - if (VT != And.getValueType()) - And = DAG.getNode(ISD::BITCAST, dl, VT, And); - return And; + return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); +} + +static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + assert(Op.getValueType().isVector() && + "We only do custom lowering for vector population count."); + return LowerVectorCTPOP(Op, Subtarget, DAG); } static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { @@ -17840,8 +18053,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, MVT::f64); SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias)); - Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or); + DAG.getBitcast(MVT::v2i64, VBias)); + Or = DAG.getBitcast(MVT::v2f64, Or); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); return; @@ -17964,7 +18177,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0)); - SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded); + SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded); if (ExperimentalVectorWideningLegalization) { // If we are legalizing vectors by widening, we already have the desired @@ -17994,7 +18207,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FANDN: return "X86ISD::FANDN"; case X86ISD::FOR: return "X86ISD::FOR"; case X86ISD::FXOR: return "X86ISD::FXOR"; - case X86ISD::FSRL: return "X86ISD::FSRL"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; @@ -18121,6 +18333,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; case X86ISD::SHUFP: return "X86ISD::SHUFP"; + case X86ISD::SHUF128: return "X86ISD::SHUF128"; case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD"; case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; @@ -18143,8 +18356,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; + case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; + case X86ISD::VRANGE: return "X86ISD::VRANGE"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; + case X86ISD::PSADBW: return "X86ISD::PSADBW"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -18184,6 +18400,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; + case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; + case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; } @@ -18193,7 +18411,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); Reloc::Model R = getTargetMachine().getRelocationModel(); @@ -20028,7 +20247,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, SDValue(ResNode.getNode(), 1)); } - return DAG.getNode(ISD::BITCAST, dl, VT, ResNode); + return DAG.getBitcast(VT, ResNode); } } @@ -20087,7 +20306,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, // Just remove no-op shuffle masks. if (Mask.size() == 1) { - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Input), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input), /*AddTo*/ true); return true; } @@ -20123,14 +20342,14 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, } if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); if (Shuffle == X86ISD::MOVDDUP) Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op); else Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20141,11 +20360,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, MVT ShuffleVT = MVT::v4f32; if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20155,11 +20374,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, MVT ShuffleVT = MVT::v4f32; if (Depth == 1 && Root->getOpcode() == Shuffle) return false; // Nothing to do! - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20189,11 +20408,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, default: llvm_unreachable("Impossible mask size!"); }; - Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + Op = DAG.getBitcast(ShuffleVT, Input); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20222,14 +20441,14 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask, PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); } MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); - Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input); + Op = DAG.getBitcast(ByteVT, Input); DCI.AddToWorklist(Op.getNode()); SDValue PSHUFBMaskOp = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask); DCI.AddToWorklist(PSHUFBMaskOp.getNode()); Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp); DCI.AddToWorklist(Op.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Op), /*AddTo*/ true); return true; } @@ -20401,7 +20620,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) { #ifndef NDEBUG for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) for (int j = 0; j < LaneElts; ++j) - assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts && + assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) && "Mask doesn't repeat in high 128-bit lanes!"); #endif Mask.resize(LaneElts); @@ -20532,7 +20751,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, SDValue W = Chain.pop_back_val(); if (V.getValueType() != W.getOperand(0).getValueType()) - V = DAG.getNode(ISD::BITCAST, DL, W.getOperand(0).getValueType(), V); + V = DAG.getBitcast(W.getOperand(0).getValueType(), V); switch (W.getOpcode()) { default: @@ -20551,7 +20770,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, } } if (V.getValueType() != N.getValueType()) - V = DAG.getNode(ISD::BITCAST, DL, N.getValueType(), V); + V = DAG.getBitcast(N.getValueType(), V); // Return the new chain to replace N. return V; @@ -20668,12 +20887,12 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, DMask[DOffset + 0] = DOffset + 1; DMask[DOffset + 1] = DOffset + 0; MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); - V = DAG.getNode(ISD::BITCAST, DL, DVT, V); + V = DAG.getBitcast(DVT, V); DCI.AddToWorklist(V.getNode()); V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); DCI.AddToWorklist(V.getNode()); - return DAG.getNode(ISD::BITCAST, DL, VT, V); + return DAG.getBitcast(VT, V); } // Look for shuffle patterns which can be implemented as a single unpack. @@ -20704,7 +20923,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) || makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) { // We can replace all three shuffles with an unpack. - V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0)); + V = DAG.getBitcast(VT, D.getOperand(0)); DCI.AddToWorklist(V.getNode()); return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL : X86ISD::UNPCKH, @@ -20848,8 +21067,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, CanFold = SVOp->getMaskElt(i) < 0; if (CanFold) { - SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0)); - SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1)); + SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0)); + SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1)); SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01); return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]); } @@ -20981,7 +21200,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, Shuffle = DAG.getVectorShuffle(CurrentVT, dl, InVec.getOperand(0), Shuffle, &ShuffleMask[0]); - Shuffle = DAG.getNode(ISD::BITCAST, dl, OriginalVT, Shuffle); + Shuffle = DAG.getBitcast(OriginalVT, Shuffle); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, EltNo); } @@ -21101,7 +21320,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, SDValue Vals[4]; if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) { - SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector); + SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector); EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(); SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, DAG.getConstant(0, dl, VecIdxTy)); @@ -21717,13 +21936,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (TValIsAllOnes && FValIsAllZeros) Ret = Cond; else if (TValIsAllOnes) - Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, - DAG.getNode(ISD::BITCAST, DL, CondVT, RHS)); + Ret = + DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS)); else if (FValIsAllZeros) Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, - DAG.getNode(ISD::BITCAST, DL, CondVT, LHS)); + DAG.getBitcast(CondVT, LHS)); - return DAG.getNode(ISD::BITCAST, DL, VT, Ret); + return DAG.getBitcast(VT, Ret); } } @@ -22554,15 +22773,13 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, // and work with those going forward. SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, OnesOrZeroesF); - SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, - Vector64); + SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64); OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Vector32, DAG.getIntPtrConstant(0, DL)); IntVT = MVT::i32; } - SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, - OnesOrZeroesF); + SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF); SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, DAG.getConstant(1, DL, IntVT)); SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, @@ -22775,7 +22992,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL, Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask); - return DAG.getNode(ISD::BITCAST, DL, N0.getValueType(), NewShuffle); + return DAG.getBitcast(N0.getValueType(), NewShuffle); } static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, @@ -22916,7 +23133,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Unsupported VT for PSIGN"); Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); - return DAG.getNode(ISD::BITCAST, DL, VT, Mask); + return DAG.getBitcast(VT, Mask); } // PBLENDVB only available on SSE 4.1 if (!Subtarget->hasSSE41()) @@ -22924,11 +23141,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; - X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X); - Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y); - Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask); + X = DAG.getBitcast(BlendVT, X); + Y = DAG.getBitcast(BlendVT, Y); + Mask = DAG.getBitcast(BlendVT, Mask); Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); - return DAG.getNode(ISD::BITCAST, DL, VT, Mask); + return DAG.getBitcast(VT, Mask); } } @@ -23129,7 +23346,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); // Convert Src0 value - SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0()); + SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0()); if (Mld->getSrc0().getOpcode() != ISD::UNDEF) { SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) @@ -23146,7 +23363,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue Mask = Mld->getMask(); if (Mask.getValueType() == VT) { // Mask and original value have the same type - NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask); + NewMask = DAG.getBitcast(WideVecVT, Mask); SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -23214,7 +23431,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue()); + SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue()); SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -23231,7 +23448,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Mask = Mst->getMask(); if (Mask.getValueType() == VT) { // Mask and original value have the same type - NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask); + NewMask = DAG.getBitcast(WideVecVT, Mask); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) @@ -23323,7 +23540,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue()); + SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue()); SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -23354,7 +23571,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff); + SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); SmallVector<SDValue, 8> Chains; SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl, TLI.getPointerTy()); @@ -23495,7 +23712,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue ExtOp0 = OldExtract.getOperand(0); unsigned VecSize = ExtOp0.getValueSizeInBits(); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64); - SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtOp0); + SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, BitCast, OldExtract.getOperand(1)); return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), @@ -24239,10 +24456,10 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, // DAG. SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); // The AND node needs bitcasts to/from an integer vector type around it. - SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst); + SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst); SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, N->getOperand(0)->getOperand(0), MaskConst); - SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd); + SDValue Res = DAG.getBitcast(VT, NewAnd); return Res; } @@ -24442,8 +24659,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, // In this case, the inner vzext is completely dead because we're going to // only look at bits inside of the low element. Just do the outer vzext on // a bitcast of the input to the inner. - return DAG.getNode(X86ISD::VZEXT, DL, VT, - DAG.getNode(ISD::BITCAST, DL, OpVT, V)); + return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V)); } // Check if we can bypass extracting and re-inserting an element of an input @@ -24465,7 +24681,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV, DAG.getIntPtrConstant(0, DL)); } - Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV); + Op = DAG.getBitcast(OpVT, OrigV); return DAG.getNode(X86ISD::VZEXT, DL, VT, Op); } } @@ -25301,6 +25517,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.first = DestReg; Res.second = &X86::GR64RegClass; } + } else if (VT != MVT::Other) { + // Type mismatch and not a clobber: Return an error; + Res.first = 0; + Res.second = nullptr; } } else if (Res.second == &X86::FR32RegClass || Res.second == &X86::FR64RegClass || @@ -25326,13 +25546,23 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.second = &X86::VR256RegClass; else if (X86::VR512RegClass.hasType(VT)) Res.second = &X86::VR512RegClass; + else if (VT != MVT::Other) { + // Type mismatch and not a clobber: Return an error; + Res.first = 0; + Res.second = nullptr; + } + } else if (VT != MVT::Other) { + // Type mismatch and not a clobber: Return an error; + Res.first = 0; + Res.second = nullptr; } return Res; } int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { // Scaling factors are not free at all. // An indexed folded instruction, i.e., inst (reg1, reg2, scale), // will take 2 allocations in the out of order engine instead of 1 @@ -25351,7 +25581,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, // E.g., on Haswell: // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3. // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. - if (isLegalAddressingMode(AM, Ty)) + if (isLegalAddressingMode(AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 // as soon as we use a second register. return AM.Scale != 0; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index b589ca42e56c..b5d062f72b24 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -56,10 +56,6 @@ namespace llvm { /// corresponds to X86::ANDNPS or X86::ANDNPD. FANDN, - /// Bitwise logical right shift of floating point values. This - /// corresponds to X86::PSRLDQ. - FSRL, - /// These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: @@ -184,6 +180,9 @@ namespace llvm { /// Shuffle 16 8-bit values within a vector. PSHUFB, + /// Compute Sum of Absolute Differences. + PSADBW, + /// Bitwise Logical AND NOT of Packed FP values. ANDNP, @@ -200,6 +199,7 @@ namespace llvm { /// Combined add and sub on an FP vector. ADDSUB, + // FP vector ops with rounding mode. FADD_RND, FSUB_RND, @@ -207,7 +207,11 @@ namespace llvm { FDIV_RND, FMAX_RND, FMIN_RND, - + FSQRT_RND, + + // FP vector get exponent + FGETEXP_RND, + // Integer add/sub with unsigned saturation. ADDUS, SUBUS, @@ -355,6 +359,8 @@ namespace llvm { PSHUFHW, PSHUFLW, SHUFP, + //Shuffle Packed Values at 128-bit granularity + SHUF128, MOVDDUP, MOVSHDUP, MOVSLDUP, @@ -374,6 +380,10 @@ namespace llvm { VPERMIV3, VPERMI, VPERM2X128, + //Fix Up Special Packed Float32/64 values + VFIXUPIMM, + //Range Restriction Calculation For Packed Pairs of Float32/64 values + VRANGE, // Broadcast scalar to vector VBROADCAST, // Broadcast subvector to vector @@ -729,7 +739,8 @@ namespace llvm { /// Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; /// Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can @@ -748,7 +759,8 @@ namespace llvm { /// of the specified type. /// If the AM is supported, the return value must be >= 0. /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; + int getScalingFactorCost(const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool isVectorShiftByScalarCheap(Type *Ty) const override; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 9d11d3c7050f..c1d0aef07118 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1047,12 +1047,6 @@ multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _, EVEX_4V; } } - -defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>, - EVEX_V512, VEX_W; -defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>, - EVEX_V512, VEX_W; - defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, EVEX_V512; defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, @@ -1063,37 +1057,6 @@ def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), (VPERMILPDZri VR512:$src1, imm:$imm)>; -// -- VPERM - register form -- -multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC, - PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> { - - def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, - (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V; - - def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, - (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>, - EVEX_4V; -} - -defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, loadv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, loadv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -let ExeDomain = SSEPackedSingle in -defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, loadv16f32, f512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -let ExeDomain = SSEPackedDouble in -defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, loadv8f64, f512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - // -- VPERM2I - 3 source operands form -- multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, @@ -3401,32 +3364,6 @@ defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, VR512, loadv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// -// AVX-512 - PSHUFD -// - -multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC, - SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT> { - def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, u8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, - (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>, - EVEX; - def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst), - (ins x86memop:$src1, u8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, - (OpVT (OpNode (mem_frag addr:$src1), - (i8 imm:$src2))))]>, EVEX; -} - -defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, loadv16i32, - i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - -//===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -3729,14 +3666,14 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, (ins _.RC:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))), - SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rr>; let mayLoad = 1 in defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i8 imm:$src2))), - SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V; + SSE_INTSHIFT_ITINS_P.rm>; } multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, @@ -3746,7 +3683,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))), - SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B; + SSE_INTSHIFT_ITINS_P.rm>, EVEX_B; } multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -3836,16 +3773,16 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, } defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>, - avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>; + avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>, - avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>; + avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>, - avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>; + avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V; -defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>; -defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>; +defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V; +defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>; defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>; @@ -3865,7 +3802,8 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", - (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))), + (_.VT (OpNode _.RC:$src1, + (_.VT (bitconvert (_.LdFrag addr:$src2))))), SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; } @@ -3927,6 +3865,65 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; +//===-------------------------------------------------------------------===// +// 1-src variable permutation VPERMW/D/Q +//===-------------------------------------------------------------------===// +multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo _> { + let Predicates = [HasAVX512] in + defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in + defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256; +} + +multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + let Predicates = [HasAVX512] in + defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, + VTInfo.info512>, + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, + VTInfo.info512>, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in + defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, + VTInfo.info256>, + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, + VTInfo.info256>, EVEX_V256; +} + + +defm VPERM : avx512_var_shift_w<0x8D, "vpermw", X86VPermv>; + +defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, + avx512vl_i32_info>; +defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, + avx512vl_i64_info>, VEX_W; +defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, + avx512vl_f32_info>; +defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, + avx512vl_f64_info>, VEX_W; + +defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", + X86VPermi, avx512vl_i64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", + X86VPermi, avx512vl_f64_info>, + EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; + +//===----------------------------------------------------------------------===// +// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW +//===----------------------------------------------------------------------===// + +defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", + X86PShufd, avx512vl_i32_info>, + EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; +defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", + X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W; +defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", + X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// @@ -4869,11 +4866,6 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (ins _.RC:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>; - defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src), OpcodeStr, - "{sae}, $src", "$src, {sae}", - (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B; - defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src), OpcodeStr, "$src", "$src", (OpNode (_.FloatVT @@ -4881,24 +4873,58 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (i32 FROUND_CURRENT))>; defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _.MemOp:$src), OpcodeStr, "$src", "$src", + (ins _.MemOp:$src), OpcodeStr, + "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.FloatVT (X86VBroadcast (_.ScalarLdFrag addr:$src))), (i32 FROUND_CURRENT))>, EVEX_B; } +multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + SDNode OpNode> { + defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src), OpcodeStr, + "{sae}, $src", "$src, {sae}", + (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B; +} multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> { defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>, - EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>, + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>, - VEX_W, EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>, + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } +multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, + SDNode OpNode> { + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>, + EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>, + EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>, + EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>, + EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + } +} let Predicates = [HasERI], hasSideEffects = 0 in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; +} +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; + +multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, + SDNode OpNodeRnd, X86VectorVTInfo _>{ + defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", + (_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>, + EVEX, EVEX_B, EVEX_RC; } multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, @@ -5007,20 +5033,22 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, } } -defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>; +multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, + SDNode OpNodeRnd> { + defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd, + v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd, + v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; +} + +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, + avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>; defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, SSE_SQRTSS, SSE_SQRTSD>; let Predicates = [HasAVX512] in { - def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), - (VSQRTPSZr VR512:$src1)>; - def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), - (VSQRTPDZr VR512:$src1)>; - def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -5583,30 +5611,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (loadv8i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; -multiclass avx512_valign<X86VectorVTInfo _> { - defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), - "valign"##_.Suffix, - "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (X86VAlign _.RC:$src2, _.RC:$src1, - (i8 imm:$src3)))>, - AVX512AIi8Base, EVEX_4V; - - // Also match valign of packed floats. - def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))), - (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>; - - let mayLoad = 1 in - def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), - !strconcat("valign"##_.Suffix, - "\t{$src3, $src2, $src1, $dst|" - "$dst, $src1, $src2, $src3}"), - []>, EVEX_4V; -} -defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - // Helper fragments to match sext vXi1 to vXiY. def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; @@ -5949,7 +5953,7 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, (_.LdFrag addr:$src))), _.RC:$src0)))]>, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; - + let mayLoad = 1 in def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.MemOp:$src), @@ -5958,7 +5962,6 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, (_.VT (bitconvert (_.LdFrag addr:$src))), _.ImmAllZerosV)))]>, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; - } multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, @@ -5979,3 +5982,212 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, EVEX; defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, EVEX, VEX_W; + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>; + defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr##", $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +// op(reg_vec2,broadcast(eltVt),imm) +multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + (i8 imm:$src3))>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + (i8 imm:$src3))>; + defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr##", $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), + (i8 imm:$src3))>, EVEX_B; + } +} + +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_scalar,imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + + defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector + (_.ScalarLdFrag addr:$src2))), + (i8 imm:$src3), + (i32 FROUND_CURRENT))>; + + let isAsmParserOnly = 1 in { + defm rmi_alt :AVX512_maskable_in_asm<opc, MRMSrcMem, _, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), + OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + []>; + } + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), + OpcodeStr, "$src3,{sae}, $src2, $src1", + "$src1, $src2,{sae}, $src3", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + (i8 imm:$src3), + (i32 FROUND_NO_EXC))>, EVEX_B; +} +//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _> { + defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>; +} + +multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, + AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>, + avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>, + EVEX_V512; + + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>, + EVEX_V128; + defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>, + EVEX_V256; + } +} + +multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, + bits<8> opc, SDNode OpNode>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128; + defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256; + } +} + +multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, + X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>, + avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>; + } +} + +defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", + avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps", + avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + +defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, + 0x55, X86VFixupimm, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, + 0x50, X86VRange, HasDQI>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, + 0x50, X86VRange, HasDQI>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + +defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, + 0x51, X86VRange, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + + +multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _, + bits<8> opc, SDNode OpNode = X86Shuf128>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512; + + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256; + } +} + +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, + AVX512VLVectorVTInfo VTInfo_FP>{ + defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>, + AVX512AIi8Base, EVEX_4V; + let isCodeGenOnly = 1 in { + defm NAME#_FP: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0x03, X86VAlign>, + AVX512AIi8Base, EVEX_4V; + } +} + +defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>, + EVEX_CD8<32, CD8VF>; +defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>, + EVEX_CD8<64, CD8VF>, VEX_W; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 331faf2fd0b4..e2fa295c0230 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -764,6 +764,14 @@ class AVX512BIi8Base : PD { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; } +class AVX512XSIi8Base : XS { + Domain ExeDomain = SSEPackedInt; + ImmType ImmT = Imm8; +} +class AVX512XDIi8Base : XD { + Domain ExeDomain = SSEPackedInt; + ImmType ImmT = Imm8; +} class AVX512PSIi8Base : PS { Domain ExeDomain = SSEPackedSingle; ImmType ImmT = Imm8; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 79d213c6e1a3..dfe58ef8067b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -35,8 +35,6 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, - SDTCisFP<0>, SDTCisInt<2> ]>; def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>, SDTCisVec<1>]>; @@ -65,7 +63,6 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; @@ -78,6 +75,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; +def X86psadbw : SDNode<"X86ISD::PSADBW", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; @@ -219,6 +219,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>]>; def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; +def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; @@ -229,6 +231,9 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>; +def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc. + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]>; + def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, @@ -247,7 +252,8 @@ def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; -def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; +def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; +def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>; def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; @@ -279,6 +285,9 @@ def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; +def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>; +def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>; + def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSubVecOfVec<1, 0>]>, []>; @@ -298,6 +307,8 @@ def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; +def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; +def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 43decf7cdda9..6b7a9299dcfb 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -433,6 +433,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) } static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { + { X86::BSF16rr, X86::BSF16rm, 0 }, + { X86::BSF32rr, X86::BSF32rm, 0 }, + { X86::BSF64rr, X86::BSF64rm, 0 }, + { X86::BSR16rr, X86::BSR16rm, 0 }, + { X86::BSR32rr, X86::BSR32rm, 0 }, + { X86::BSR64rr, X86::BSR64rm, 0 }, { X86::CMP16rr, X86::CMP16rm, 0 }, { X86::CMP32rr, X86::CMP32rm, 0 }, { X86::CMP64rr, X86::CMP64rm, 0 }, @@ -1690,8 +1696,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 }, { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, - { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, - { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, + { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 }, + { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 }, { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, @@ -4697,8 +4703,17 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return false; } +static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs) { + unsigned NumAddrOps = MOs.size(); + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB.addOperand(MOs[i]); + if (NumAddrOps < 4) // FrameIndex only + addOffset(MIB, 0); +} + static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef<MachineOperand> MOs, + MachineBasicBlock::iterator InsertPt, MachineInstr *MI, const TargetInstrInfo &TII) { // Create the base instruction with the memory operand as the first part. @@ -4706,11 +4721,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); MachineInstrBuilder MIB(MF, NewMI); - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB.addOperand(MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - addOffset(MIB, 0); + addOperands(MIB, MOs); // Loop over the rest of the ri operands, converting them over. unsigned NumOps = MI->getDesc().getNumOperands()-2; @@ -4722,11 +4733,16 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MachineOperand &MO = MI->getOperand(i); MIB.addOperand(MO); } + + MachineBasicBlock *MBB = InsertPt->getParent(); + MBB->insert(InsertPt, NewMI); + return MIB; } static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef<MachineOperand> MOs, + MachineBasicBlock::iterator InsertPt, MachineInstr *MI, const TargetInstrInfo &TII) { // Omit the implicit operands, something BuildMI can't do. MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), @@ -4737,38 +4753,32 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, MachineOperand &MO = MI->getOperand(i); if (i == OpNo) { assert(MO.isReg() && "Expected to fold into reg operand!"); - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB.addOperand(MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - addOffset(MIB, 0); + addOperands(MIB, MOs); } else { MIB.addOperand(MO); } } + + MachineBasicBlock *MBB = InsertPt->getParent(); + MBB->insert(InsertPt, NewMI); + return MIB; } static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef<MachineOperand> MOs, + MachineBasicBlock::iterator InsertPt, MachineInstr *MI) { - MachineFunction &MF = *MI->getParent()->getParent(); - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); - - unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB.addOperand(MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - addOffset(MIB, 0); + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI->getDebugLoc(), TII.get(Opcode)); + addOperands(MIB, MOs); return MIB.addImm(0); } -MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - unsigned OpNum, - ArrayRef<MachineOperand> MOs, - unsigned Size, unsigned Align, - bool AllowCommute) const { +MachineInstr *X86InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, unsigned OpNum, + ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt, + unsigned Size, unsigned Align, bool AllowCommute) const { const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr; bool isCallRegIndirect = Subtarget.callRegIndirect(); @@ -4802,7 +4812,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, isTwoAddrFold = true; } else if (OpNum == 0) { if (MI->getOpcode() == X86::MOV32r0) { - NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI); if (NewMI) return NewMI; } @@ -4847,9 +4857,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } if (isTwoAddrFold) - NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); + NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this); else - NewMI = FuseInst(MF, Opcode, OpNum, MOs, MI, *this); + NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this); if (NarrowToMOV32rm) { // If this is the special case where we use a MOV32rm to load a 32-bit @@ -4901,8 +4911,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Attempt to fold with the commuted version of the instruction. unsigned CommuteOp = (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1); - NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align, - /*AllowCommute=*/false); + NewMI = + foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align, + /*AllowCommute=*/false); if (NewMI) return NewMI; @@ -5131,10 +5142,9 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, MI->addRegisterKilled(Reg, TRI, true); } -MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - int FrameIndex) const { +MachineInstr *X86InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // Check switch flag if (NoFusing) return nullptr; @@ -5173,8 +5183,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return nullptr; return foldMemoryOperandImpl(MF, MI, Ops[0], - MachineOperand::CreateFI(FrameIndex), Size, - Alignment, /*AllowCommute=*/true); + MachineOperand::CreateFI(FrameIndex), InsertPt, + Size, Alignment, /*AllowCommute=*/true); } static bool isPartialRegisterLoad(const MachineInstr &LoadMI, @@ -5196,17 +5206,16 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI, return false; } -MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - ArrayRef<unsigned> Ops, - MachineInstr *LoadMI) const { +MachineInstr *X86InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const { // If loading from a FrameIndex, fold directly from the FrameIndex. unsigned NumOps = LoadMI->getDesc().getNumOperands(); int FrameIndex; if (isLoadFromStackSlot(LoadMI, FrameIndex)) { if (isPartialRegisterLoad(*LoadMI, MF)) return nullptr; - return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); + return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex); } // Check switch flag @@ -5326,7 +5335,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, break; } } - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt, /*Size=*/0, Alignment, /*AllowCommute=*/true); } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 0dd8101bbe5b..ac1b2d4fedc6 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -307,6 +307,7 @@ public: /// references has been changed. MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex) const override; /// foldMemoryOperand - Same as the previous version except it allows folding @@ -314,6 +315,7 @@ public: /// stack slot. MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; /// canFoldMemoryOperand - Returns true if the specified load / store is @@ -407,6 +409,7 @@ public: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned OpNum, ArrayRef<MachineOperand> MOs, + MachineBasicBlock::iterator InsertPt, unsigned Size, unsigned Alignment, bool AllowCommute) const; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 70c2027520f9..e936b4bc466e 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -788,6 +788,7 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; +def HasMPX : Predicate<"Subtarget->hasMPX()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; @@ -2456,6 +2457,9 @@ include "X86InstrAVX512.td" include "X86InstrMMX.td" include "X86Instr3DNow.td" +// MPX instructions +include "X86InstrMPX.td" + include "X86InstrVMX.td" include "X86InstrSVM.td" diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td new file mode 100644 index 000000000000..cf5e2e38fe58 --- /dev/null +++ b/lib/Target/X86/X86InstrMPX.td @@ -0,0 +1,70 @@ +//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 MPX instruction set, defining the +// instructions, and properties of the instructions which are needed for code +// generation, machine code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> { + def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src), + OpcodeStr#" \t{$src, $dst|$dst, $src}", []>, + Requires<[HasMPX, Not64BitMode]>; + def 64rm: RI<opc, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), + OpcodeStr#" \t{$src, $dst|$dst, $src}", []>, + Requires<[HasMPX, In64BitMode]>; +} + +defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; + +multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> { + def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2), + OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + Requires<[HasMPX, Not64BitMode]>; + def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2), + OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + Requires<[HasMPX, In64BitMode]>; + def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2), + OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + Requires<[HasMPX, Not64BitMode]>; + def 64rr: RI<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2), + OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>, + Requires<[HasMPX, In64BitMode]>; +} +defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS; +defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD; +defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD; + +def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX]>; +def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX, Not64BitMode]>; +def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX, In64BitMode]>; + +def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX]>; +def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs i64mem:$dst), (ins BNDR:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX, Not64BitMode]>; +def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src), + "bndmov \t{$src, $dst|$dst, $src}", []>, PD, + Requires<[HasMPX, In64BitMode]>; + +def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), + "bndstx \t{$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMPX]>; +def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), + "bndldx \t{$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMPX]>;
\ No newline at end of file diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d3b401e8cfcb..8294e38e9957 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3560,7 +3560,7 @@ multiclass scalar_unary_math_patterns<Intrinsic Intr, string OpcPrefix, let Predicates = [HasAVX] in { def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; - + def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))), (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; } @@ -4053,6 +4053,20 @@ defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, int_x86_avx2_psad_bw, SSE_PMADD, 1>; +let Predicates = [HasAVX2] in + def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1), + (v32i8 VR256:$src2))), + (VPSADBWYrr VR256:$src2, VR256:$src1)>; + +let Predicates = [HasAVX] in + def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), + (VPSADBWrr VR128:$src2, VR128:$src1)>; + +def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), + (PSADBWrr VR128:$src2, VR128:$src1)>; + let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, @@ -4207,16 +4221,6 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { } } // Constraints = "$src1 = $dst" -let Predicates = [HasAVX] in { - def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), - (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; -} - -let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), - (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; -} - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Comparison Instructions //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 4af514a83ca5..0268066c2ba1 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -21,7 +21,8 @@ enum IntrinsicType { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, - INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, + INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, + INTR_TYPE_3OP_MASK, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND }; @@ -339,9 +340,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV, X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV, - X86ISD::FDIV_RND), + X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV, - X86ISD::FDIV_RND), + X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG, @@ -366,6 +367,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, @@ -559,6 +572,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::RNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, + X86ISD::FSQRT_RND), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, + X86ISD::FSQRT_RND), X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0), X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0), X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB, @@ -583,6 +604,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), + X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), + X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 556b518936f3..ff1436af4ece 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -128,6 +128,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { const DataLayout *DL = TM.getDataLayout(); assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); + MCSymbol *Sym = nullptr; SmallString<128> Name; StringRef Suffix; @@ -160,12 +161,14 @@ GetSymbolFromOperand(const MachineOperand &MO) const { else getMang()->getNameWithPrefix(Name, MO.getSymbolName()); } else if (MO.isMBB()) { - Name += MO.getMBB()->getSymbol()->getName(); + assert(Suffix.empty()); + Sym = MO.getMBB()->getSymbol(); } unsigned OrigLen = Name.size() - PrefixLen; Name += Suffix; - MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); + if (!Sym) + Sym = Ctx.getOrCreateSymbol(Name); StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); @@ -240,10 +243,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; case X86II::MO_TLVP_PIC_BASE: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), + Expr = MCBinaryExpr::createSub(Expr, + MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); break; @@ -264,10 +267,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - Expr = MCSymbolRefExpr::Create(Sym, Ctx); + Expr = MCSymbolRefExpr::create(Sym, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), + Expr = MCBinaryExpr::createSub(Expr, + MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI()) { assert(MAI.doesSetDirectiveSuppressesReloc()); @@ -277,17 +280,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, // section so we are restricting it to jumptable references. MCSymbol *Label = Ctx.createTempSymbol(); AsmPrinter.OutStreamer->EmitAssignment(Label, Expr); - Expr = MCSymbolRefExpr::Create(Label, Ctx); + Expr = MCSymbolRefExpr::create(Label, Ctx); } break; } if (!Expr) - Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); + Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), Ctx), + Expr = MCBinaryExpr::createAdd(Expr, + MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); return MCOperand::createExpr(Expr); } @@ -710,7 +713,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, } MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); - const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context); + const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context); MCInst LEA; if (is64Bits) { @@ -749,7 +752,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name); const MCSymbolRefExpr *tlsRef = - MCSymbolRefExpr::Create(tlsGetAddr, + MCSymbolRefExpr::create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context); @@ -1071,7 +1074,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32) - .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); + .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); // Emit the label. OutStreamer->EmitLabel(PICBase); @@ -1100,12 +1103,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // Now that we have emitted the label, lower the complex operand expression. MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); - const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); + const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); const MCExpr *PICBase = - MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext); - DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); + MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); + DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); - DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), + DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext), DotExpr, OutContext); EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1f361631a0b7..e9b6bfc3273c 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -175,12 +175,12 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; case 2: // Available for tailcall (not callee-saved GPRs). - if (IsWin64) + const Function *F = MF.getFunction(); + if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64)) return &X86::GR64_TCW64RegClass; else if (Is64Bit) return &X86::GR64_TCRegClass; - const Function *F = MF.getFunction(); bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); if (hasHipeCC) return &X86::GR32RegClass; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 2e735fa3c026..cdb151c26a05 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -302,6 +302,11 @@ def CR15 : X86Reg<"cr15", 15>; def EIZ : X86Reg<"eiz", 4>; def RIZ : X86Reg<"riz", 4>; +// Bound registers, used in MPX instructions +def BND0 : X86Reg<"bnd0", 0>; +def BND1 : X86Reg<"bnd1", 1>; +def BND2 : X86Reg<"bnd2", 2>; +def BND3 : X86Reg<"bnd3", 3>; //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the @@ -484,3 +489,6 @@ def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)> {let Size = 8;} def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;} def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;} def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;} + +// Bound registers +def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
\ No newline at end of file diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 1cdab14e034e..74af29f4566c 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -259,6 +259,7 @@ void X86Subtarget::initializeEnvironment() { HasSHA = false; HasPRFCHW = false; HasRDSEED = false; + HasMPX = false; IsBTMemSlow = false; IsSHLDSlow = false; IsUAMemFast = false; @@ -273,8 +274,6 @@ void X86Subtarget::initializeEnvironment() { LEAUsesAG = false; SlowLEA = false; SlowIncDec = false; - UseSqrtEst = false; - UseReciprocalEst = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 455dd7744d73..a476f7aba932 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -190,16 +190,6 @@ protected: /// True if INC and DEC instructions are slow when writing to flags bool SlowIncDec; - /// Use the RSQRT* instructions to optimize square root calculations. - /// For this to be profitable, the cost of FSQRT and FDIV must be - /// substantially higher than normal FP ops like FADD and FMUL. - bool UseSqrtEst; - - /// Use the RCP* instructions to optimize FP division calculations. - /// For this to be profitable, the cost of FDIV must be - /// substantially higher than normal FP ops like FADD and FMUL. - bool UseReciprocalEst; - /// Processor has AVX-512 PreFetch Instructions bool HasPFI; @@ -218,6 +208,9 @@ protected: /// Processor has AVX-512 Vector Length eXtenstions bool HasVLX; + /// Processot supports MPX - Memory Protection Extensions + bool HasMPX; + /// Use software floating point for code generation. bool UseSoftFloat; @@ -377,14 +370,13 @@ public: bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } bool slowIncDec() const { return SlowIncDec; } - bool useSqrtEst() const { return UseSqrtEst; } - bool useReciprocalEst() const { return UseReciprocalEst; } bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } + bool hasMPX() const { return HasMPX; } bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 3e5f1d82202f..646cff7c5bdb 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -105,6 +105,13 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, if (Subtarget.isTargetWin64()) this->Options.TrapUnreachable = true; + // TODO: By default, all reciprocal estimate operations are off because + // that matches the behavior before TargetRecip was added (except for btver2 + // which used subtarget features to enable this type of codegen). + // We should change this to match GCC behavior where everything but + // scalar division estimates are turned on by default with -ffast-math. + this->Options.Reciprocals.setDefaults("all", false, 1); + initAsmInfo(); } @@ -221,9 +228,9 @@ bool X86PassConfig::addILPOpts() { } bool X86PassConfig::addPreISel() { - // Only add this pass for 32-bit x86. + // Only add this pass for 32-bit x86 Windows. Triple TT(TM->getTargetTriple()); - if (TT.getArch() == Triple::x86) + if (TT.isOSWindows() && TT.getArch() == Triple::x86) addPass(createX86WinEHStatePass()); return true; } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 6bf45c37e38b..f9f62904b64b 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -32,9 +32,9 @@ const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference( if ((Encoding & DW_EH_PE_indirect) && (Encoding & DW_EH_PE_pcrel)) { const MCSymbol *Sym = TM.getSymbol(GV, Mang); const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); - const MCExpr *Four = MCConstantExpr::Create(4, getContext()); - return MCBinaryExpr::CreateAdd(Res, Four, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::create(4, getContext()); + return MCBinaryExpr::createAdd(Res, Four, getContext()); } return TargetLoweringObjectFileMachO::getTTypeGlobalReference( @@ -55,14 +55,14 @@ const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel( // foo@GOTPCREL+4+<offset>. unsigned FinalOff = Offset+MV.getConstant()+4; const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); - const MCExpr *Off = MCConstantExpr::Create(FinalOff, getContext()); - return MCBinaryExpr::CreateAdd(Res, Off, getContext()); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); + const MCExpr *Off = MCConstantExpr::create(FinalOff, getContext()); + return MCBinaryExpr::createAdd(Res, Off, getContext()); } const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol( const MCSymbol *Sym) const { - return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); + return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext()); } void @@ -116,7 +116,7 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( if (GOLHS->isThreadLocal()) return nullptr; - return MCSymbolRefExpr::Create(TM.getSymbol(GOLHS, Mang), + return MCSymbolRefExpr::create(TM.getSymbol(GOLHS, Mang), MCSymbolRefExpr::VK_COFF_IMGREL32, getContext()); } diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 4efaada40926..ce69ea721993 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -16,6 +16,7 @@ #include "X86.h" #include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Dominators.h" @@ -59,30 +60,49 @@ public: private: void emitExceptionRegistrationRecord(Function *F); - void linkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode, - Value *Handler); - void unlinkExceptionRegistration(IRBuilder<> &Builder, Value *RegNode); + void linkExceptionRegistration(IRBuilder<> &Builder, Value *Handler); + void unlinkExceptionRegistration(IRBuilder<> &Builder); + void addCXXStateStores(Function &F, MachineModuleInfo &MMI); + void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo, + Function &F, int BaseState); + void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State); Value *emitEHLSDA(IRBuilder<> &Builder, Function *F); Function *generateLSDAInEAXThunk(Function *ParentFunc); + int escapeRegNode(Function &F); + // Module-level type getters. - Type *getEHRegistrationType(); - Type *getSEH3RegistrationType(); - Type *getSEH4RegistrationType(); - Type *getCXXEH3RegistrationType(); + Type *getEHLinkRegistrationType(); + Type *getSEHRegistrationType(); + Type *getCXXEHRegistrationType(); // Per-module data. Module *TheModule = nullptr; - StructType *EHRegistrationTy = nullptr; - StructType *CXXEH3RegistrationTy = nullptr; - StructType *SEH3RegistrationTy = nullptr; - StructType *SEH4RegistrationTy = nullptr; + StructType *EHLinkRegistrationTy = nullptr; + StructType *CXXEHRegistrationTy = nullptr; + StructType *SEHRegistrationTy = nullptr; + Function *FrameRecover = nullptr; + Function *FrameAddress = nullptr; + Function *FrameEscape = nullptr; // Per-function state EHPersonality Personality = EHPersonality::Unknown; Function *PersonalityFn = nullptr; + + /// The stack allocation containing all EH data, including the link in the + /// fs:00 chain and the current state. + AllocaInst *RegNode = nullptr; + + /// Struct type of RegNode. Used for GEPing. + Type *RegNodeTy = nullptr; + + /// The index of the state field of RegNode. + int StateFieldIndex = ~0U; + + /// The linked list node subobject inside of RegNode. + Value *Link = nullptr; }; } @@ -92,16 +112,21 @@ char WinEHStatePass::ID = 0; bool WinEHStatePass::doInitialization(Module &M) { TheModule = &M; + FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::frameescape); + FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::framerecover); + FrameAddress = Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress); return false; } bool WinEHStatePass::doFinalization(Module &M) { assert(TheModule == &M); TheModule = nullptr; - EHRegistrationTy = nullptr; - CXXEH3RegistrationTy = nullptr; - SEH3RegistrationTy = nullptr; - SEH4RegistrationTy = nullptr; + EHLinkRegistrationTy = nullptr; + CXXEHRegistrationTy = nullptr; + SEHRegistrationTy = nullptr; + FrameEscape = nullptr; + FrameRecover = nullptr; + FrameAddress = nullptr; return false; } @@ -136,8 +161,19 @@ bool WinEHStatePass::runOnFunction(Function &F) { if (!isMSVCEHPersonality(Personality)) return false; + // Disable frame pointer elimination in this function. + // FIXME: Do the nested handlers need to keep the parent ebp in ebp, or can we + // use an arbitrary register? + F.addFnAttr("no-frame-pointer-elim", "true"); + emitExceptionRegistrationRecord(&F); - // FIXME: State insertion. + + auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>(); + assert(MMIPtr && "MachineModuleInfo should always be available"); + MachineModuleInfo &MMI = *MMIPtr; + if (Personality == EHPersonality::MSVC_CXX) { + addCXXStateStores(F, MMI); + } // Reset per-function state. PersonalityFn = nullptr; @@ -152,17 +188,17 @@ bool WinEHStatePass::runOnFunction(Function &F) { /// EHRegistrationNode *Next; /// PEXCEPTION_ROUTINE Handler; /// }; -Type *WinEHStatePass::getEHRegistrationType() { - if (EHRegistrationTy) - return EHRegistrationTy; +Type *WinEHStatePass::getEHLinkRegistrationType() { + if (EHLinkRegistrationTy) + return EHLinkRegistrationTy; LLVMContext &Context = TheModule->getContext(); - EHRegistrationTy = StructType::create(Context, "EHRegistrationNode"); + EHLinkRegistrationTy = StructType::create(Context, "EHRegistrationNode"); Type *FieldTys[] = { - EHRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next + EHLinkRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next Type::getInt8PtrTy(Context) // EXCEPTION_DISPOSITION (*Handler)(...) }; - EHRegistrationTy->setBody(FieldTys, false); - return EHRegistrationTy; + EHLinkRegistrationTy->setBody(FieldTys, false); + return EHLinkRegistrationTy; } /// The __CxxFrameHandler3 registration node: @@ -171,40 +207,21 @@ Type *WinEHStatePass::getEHRegistrationType() { /// EHRegistrationNode SubRecord; /// int32_t TryLevel; /// }; -Type *WinEHStatePass::getCXXEH3RegistrationType() { - if (CXXEH3RegistrationTy) - return CXXEH3RegistrationTy; +Type *WinEHStatePass::getCXXEHRegistrationType() { + if (CXXEHRegistrationTy) + return CXXEHRegistrationTy; LLVMContext &Context = TheModule->getContext(); Type *FieldTys[] = { Type::getInt8PtrTy(Context), // void *SavedESP - getEHRegistrationType(), // EHRegistrationNode SubRecord + getEHLinkRegistrationType(), // EHRegistrationNode SubRecord Type::getInt32Ty(Context) // int32_t TryLevel }; - CXXEH3RegistrationTy = + CXXEHRegistrationTy = StructType::create(FieldTys, "CXXExceptionRegistration"); - return CXXEH3RegistrationTy; -} - -/// The _except_handler3 registration node: -/// struct EH3ExceptionRegistration { -/// EHRegistrationNode SubRecord; -/// void *ScopeTable; -/// int32_t TryLevel; -/// }; -Type *WinEHStatePass::getSEH3RegistrationType() { - if (SEH3RegistrationTy) - return SEH3RegistrationTy; - LLVMContext &Context = TheModule->getContext(); - Type *FieldTys[] = { - getEHRegistrationType(), // EHRegistrationNode SubRecord - Type::getInt8PtrTy(Context), // void *ScopeTable - Type::getInt32Ty(Context) // int32_t TryLevel - }; - SEH3RegistrationTy = StructType::create(FieldTys, "EH3ExceptionRegistration"); - return SEH3RegistrationTy; + return CXXEHRegistrationTy; } -/// The _except_handler4 registration node: +/// The _except_handler3/4 registration node: /// struct EH4ExceptionRegistration { /// void *SavedESP; /// _EXCEPTION_POINTERS *ExceptionPointers; @@ -212,19 +229,19 @@ Type *WinEHStatePass::getSEH3RegistrationType() { /// int32_t EncodedScopeTable; /// int32_t TryLevel; /// }; -Type *WinEHStatePass::getSEH4RegistrationType() { - if (SEH4RegistrationTy) - return SEH4RegistrationTy; +Type *WinEHStatePass::getSEHRegistrationType() { + if (SEHRegistrationTy) + return SEHRegistrationTy; LLVMContext &Context = TheModule->getContext(); Type *FieldTys[] = { Type::getInt8PtrTy(Context), // void *SavedESP Type::getInt8PtrTy(Context), // void *ExceptionPointers - getEHRegistrationType(), // EHRegistrationNode SubRecord + getEHLinkRegistrationType(), // EHRegistrationNode SubRecord Type::getInt32Ty(Context), // int32_t EncodedScopeTable Type::getInt32Ty(Context) // int32_t TryLevel }; - SEH4RegistrationTy = StructType::create(FieldTys, "EH4ExceptionRegistration"); - return SEH4RegistrationTy; + SEHRegistrationTy = StructType::create(FieldTys, "SEHExceptionRegistration"); + return SEHRegistrationTy; } // Emit an exception registration record. These are stack allocations with the @@ -238,62 +255,63 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) { StringRef PersonalityName = PersonalityFn->getName(); IRBuilder<> Builder(&F->getEntryBlock(), F->getEntryBlock().begin()); Type *Int8PtrType = Builder.getInt8PtrTy(); - Value *SubRecord = nullptr; - if (PersonalityName == "__CxxFrameHandler3") { - Type *RegNodeTy = getCXXEH3RegistrationType(); - Value *RegNode = Builder.CreateAlloca(RegNodeTy); + if (Personality == EHPersonality::MSVC_CXX) { + RegNodeTy = getCXXEHRegistrationType(); + RegNode = Builder.CreateAlloca(RegNodeTy); // FIXME: We can skip this in -GS- mode, when we figure that out. // SavedESP = llvm.stacksave() Value *SP = Builder.CreateCall( Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {}); Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); // TryLevel = -1 - Builder.CreateStore(Builder.getInt32(-1), - Builder.CreateStructGEP(RegNodeTy, RegNode, 2)); + StateFieldIndex = 2; + insertStateNumberStore(RegNode, Builder.GetInsertPoint(), -1); // Handler = __ehhandler$F Function *Trampoline = generateLSDAInEAXThunk(F); - SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 1); - linkExceptionRegistration(Builder, SubRecord, Trampoline); - } else if (PersonalityName == "_except_handler3") { - Type *RegNodeTy = getSEH3RegistrationType(); - Value *RegNode = Builder.CreateAlloca(RegNodeTy); - // TryLevel = -1 - Builder.CreateStore(Builder.getInt32(-1), - Builder.CreateStructGEP(RegNodeTy, RegNode, 2)); - // ScopeTable = llvm.x86.seh.lsda(F) - Value *LSDA = emitEHLSDA(Builder, F); - Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1)); - SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 0); - linkExceptionRegistration(Builder, SubRecord, PersonalityFn); - } else if (PersonalityName == "_except_handler4") { - Type *RegNodeTy = getSEH4RegistrationType(); - Value *RegNode = Builder.CreateAlloca(RegNodeTy); + Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1); + linkExceptionRegistration(Builder, Trampoline); + } else if (Personality == EHPersonality::MSVC_X86SEH) { + // If _except_handler4 is in use, some additional guard checks and prologue + // stuff is required. + bool UseStackGuard = (PersonalityName == "_except_handler4"); + RegNodeTy = getSEHRegistrationType(); + RegNode = Builder.CreateAlloca(RegNodeTy); // SavedESP = llvm.stacksave() Value *SP = Builder.CreateCall( Intrinsic::getDeclaration(TheModule, Intrinsic::stacksave), {}); Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); - // TryLevel = -2 - Builder.CreateStore(Builder.getInt32(-2), - Builder.CreateStructGEP(RegNodeTy, RegNode, 4)); - // FIXME: XOR the LSDA with __security_cookie. + // TryLevel = -2 / -1 + StateFieldIndex = 4; + insertStateNumberStore(RegNode, Builder.GetInsertPoint(), + UseStackGuard ? -2 : -1); // ScopeTable = llvm.x86.seh.lsda(F) Value *FI8 = Builder.CreateBitCast(F, Int8PtrType); Value *LSDA = Builder.CreateCall( Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_lsda), FI8); - Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 1)); - SubRecord = Builder.CreateStructGEP(RegNodeTy, RegNode, 2); - linkExceptionRegistration(Builder, SubRecord, PersonalityFn); + Type *Int32Ty = Type::getInt32Ty(TheModule->getContext()); + LSDA = Builder.CreatePtrToInt(LSDA, Int32Ty); + // If using _except_handler4, xor the address of the table with + // __security_cookie. + if (UseStackGuard) { + Value *Cookie = + TheModule->getOrInsertGlobal("__security_cookie", Int32Ty); + Value *Val = Builder.CreateLoad(Int32Ty, Cookie); + LSDA = Builder.CreateXor(LSDA, Val); + } + Builder.CreateStore(LSDA, Builder.CreateStructGEP(RegNodeTy, RegNode, 3)); + Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 2); + linkExceptionRegistration(Builder, PersonalityFn); } else { llvm_unreachable("unexpected personality function"); } - // FIXME: Insert an unlink before all returns. + // Insert an unlink before all returns. for (BasicBlock &BB : *F) { TerminatorInst *T = BB.getTerminator(); if (!isa<ReturnInst>(T)) continue; Builder.SetInsertPoint(T); - unlinkExceptionRegistration(Builder, SubRecord); + unlinkExceptionRegistration(Builder); } } @@ -342,33 +360,122 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { } void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder, - Value *RegNode, Value *Handler) { - Type *RegNodeTy = getEHRegistrationType(); + Value *Handler) { + Type *LinkTy = getEHLinkRegistrationType(); // Handler = Handler Handler = Builder.CreateBitCast(Handler, Builder.getInt8PtrTy()); - Builder.CreateStore(Handler, Builder.CreateStructGEP(RegNodeTy, RegNode, 1)); + Builder.CreateStore(Handler, Builder.CreateStructGEP(LinkTy, Link, 1)); // Next = [fs:00] Constant *FSZero = - Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257)); + Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257)); Value *Next = Builder.CreateLoad(FSZero); - Builder.CreateStore(Next, Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); - // [fs:00] = RegNode - Builder.CreateStore(RegNode, FSZero); + Builder.CreateStore(Next, Builder.CreateStructGEP(LinkTy, Link, 0)); + // [fs:00] = Link + Builder.CreateStore(Link, FSZero); } -void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder, - Value *RegNode) { - // Clone RegNode into the current BB for better address mode folding. - if (auto *GEP = dyn_cast<GetElementPtrInst>(RegNode)) { +void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) { + // Clone Link into the current BB for better address mode folding. + if (auto *GEP = dyn_cast<GetElementPtrInst>(Link)) { GEP = cast<GetElementPtrInst>(GEP->clone()); Builder.Insert(GEP); - RegNode = GEP; + Link = GEP; } - Type *RegNodeTy = getEHRegistrationType(); - // [fs:00] = RegNode->Next + Type *LinkTy = getEHLinkRegistrationType(); + // [fs:00] = Link->Next Value *Next = - Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); + Builder.CreateLoad(Builder.CreateStructGEP(LinkTy, Link, 0)); Constant *FSZero = - Constant::getNullValue(RegNodeTy->getPointerTo()->getPointerTo(257)); + Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257)); Builder.CreateStore(Next, FSZero); } + +void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) { + WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F); + calculateWinCXXEHStateNumbers(&F, FuncInfo); + + // The base state for the parent is -1. + addCXXStateStoresToFunclet(RegNode, FuncInfo, F, -1); + + // Set up RegNodeEscapeIndex + int RegNodeEscapeIndex = escapeRegNode(F); + + // Only insert stores in catch handlers. + Constant *FI8 = + ConstantExpr::getBitCast(&F, Type::getInt8PtrTy(TheModule->getContext())); + for (auto P : FuncInfo.HandlerBaseState) { + Function *Handler = const_cast<Function *>(P.first); + int BaseState = P.second; + IRBuilder<> Builder(&Handler->getEntryBlock(), + Handler->getEntryBlock().begin()); + // FIXME: Find and reuse such a call if present. + Value *ParentFP = Builder.CreateCall(FrameAddress, {Builder.getInt32(1)}); + Value *RecoveredRegNode = Builder.CreateCall( + FrameRecover, {FI8, ParentFP, Builder.getInt32(RegNodeEscapeIndex)}); + RecoveredRegNode = + Builder.CreateBitCast(RecoveredRegNode, RegNodeTy->getPointerTo(0)); + addCXXStateStoresToFunclet(RecoveredRegNode, FuncInfo, *Handler, BaseState); + } +} + +/// Escape RegNode so that we can access it from child handlers. Find the call +/// to frameescape, if any, in the entry block and append RegNode to the list +/// of arguments. +int WinEHStatePass::escapeRegNode(Function &F) { + // Find the call to frameescape and extract its arguments. + IntrinsicInst *EscapeCall = nullptr; + for (Instruction &I : F.getEntryBlock()) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); + if (II && II->getIntrinsicID() == Intrinsic::frameescape) { + EscapeCall = II; + break; + } + } + SmallVector<Value *, 8> Args; + if (EscapeCall) { + auto Ops = EscapeCall->arg_operands(); + Args.append(Ops.begin(), Ops.end()); + } + Args.push_back(RegNode); + + // Replace the call (if it exists) with new one. Otherwise, insert at the end + // of the entry block. + IRBuilder<> Builder(&F.getEntryBlock(), + EscapeCall ? EscapeCall : F.getEntryBlock().end()); + Builder.CreateCall(FrameEscape, Args); + if (EscapeCall) + EscapeCall->eraseFromParent(); + return Args.size() - 1; +} + +void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode, + WinEHFuncInfo &FuncInfo, + Function &F, int BaseState) { + // Iterate all the instructions and emit state number stores. + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (auto *CI = dyn_cast<CallInst>(&I)) { + // Possibly throwing call instructions have no actions to take after + // an unwind. Ensure they are in the -1 state. + if (CI->doesNotThrow()) + continue; + insertStateNumberStore(ParentRegNode, CI, BaseState); + } else if (auto *II = dyn_cast<InvokeInst>(&I)) { + // Look up the state number of the landingpad this unwinds to. + LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst(); + // FIXME: Why does this assertion fail? + //assert(FuncInfo.LandingPadStateMap.count(LPI) && "LP has no state!"); + int State = FuncInfo.LandingPadStateMap[LPI]; + insertStateNumberStore(ParentRegNode, II, State); + } + } + } +} + +void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode, + Instruction *IP, int State) { + IRBuilder<> Builder(IP); + Value *StateField = + Builder.CreateStructGEP(RegNodeTy, ParentRegNode, StateFieldIndex); + Builder.CreateStore(Builder.getInt32(State), StateField); +} diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp index 36b3b02a707a..500c84d2a418 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp @@ -45,7 +45,8 @@ printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) { report_fatal_error("can't handle InlineJT32"); } -static void printExpr(const MCExpr *Expr, raw_ostream &OS) { +static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI, + raw_ostream &OS) { int Offset = 0; const MCSymbolRefExpr *SRE; @@ -60,7 +61,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { } assert(SRE->getKind() == MCSymbolRefExpr::VK_None); - OS << SRE->getSymbol(); + SRE->getSymbol().print(OS, MAI); if (Offset) { if (Offset > 0) @@ -83,5 +84,5 @@ printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { } assert(Op.isExpr() && "unknown operand kind in printOperand"); - printExpr(Op.getExpr(), O); + printExpr(Op.getExpr(), &MAI, O); } diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index f2d2b37d6f21..3178a4edbb3b 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp @@ -8,12 +8,11 @@ //===----------------------------------------------------------------------===// #include "XCoreMCAsmInfo.h" -#include "llvm/ADT/StringRef.h" using namespace llvm; void XCoreMCAsmInfo::anchor() { } -XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) { +XCoreMCAsmInfo::XCoreMCAsmInfo(const Triple &TT) { SupportsDebugInformation = true; Data16bitsDirective = "\t.short\t"; Data32bitsDirective = "\t.long\t"; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h index 26df211eecee..39581e424e8c 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h @@ -17,14 +17,14 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class StringRef; - class Target; +class Triple; - class XCoreMCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit XCoreMCAsmInfo(StringRef TT); - }; +class XCoreMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit XCoreMCAsmInfo(const Triple &TT); +}; } // namespace llvm diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index ce0d39fe407f..f0e459620c9c 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -54,7 +54,7 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, } static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { + const Triple &TT) { MCAsmInfo *MAI = new XCoreMCAsmInfo(TT); // Initial state of the frame pointer is SP. diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 23e24f2afd5a..702056d781d0 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -37,7 +37,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -100,7 +100,7 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) { Twine(Sym->getName() + StringRef(".globound"))); OutStreamer->EmitSymbolAttribute(SymGlob, MCSA_Global); OutStreamer->EmitAssignment(SymGlob, - MCConstantExpr::Create(ATy->getNumElements(), + MCConstantExpr::create(ATy->getNumElements(), OutContext)); if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()) { @@ -157,7 +157,8 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { unsigned Size = TD->getTypeAllocSize(C->getType()); if (MAI->hasDotTypeDotSizeDirective()) { OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); - OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); + OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym), + MCConstantExpr::create(Size, OutContext)); } OutStreamer->EmitLabel(GVSym); @@ -201,7 +202,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, MachineBasicBlock *MBB = JTBBs[i]; if (i > 0) O << ","; - O << *MBB->getSymbol(); + MBB->getSymbol()->print(O, MAI); } } @@ -217,17 +218,17 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); + MO.getMBB()->getSymbol()->print(O, MAI); break; case MachineOperand::MO_GlobalAddress: - O << *getSymbol(MO.getGlobal()); + getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_ConstantPoolIndex: O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); break; case MachineOperand::MO_BlockAddress: - O << *GetBlockAddressSymbol(MO.getBlockAddress()); + GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI); break; default: llvm_unreachable("not implemented"); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index f56caec24d63..aa71241102ff 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1926,7 +1926,8 @@ static inline bool isImmUs4(int64_t val) /// by AM is legal for this target, for a load/store of the specified type. bool XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty, + unsigned AS) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 22014ed4bac6..97f0494b6fe3 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -120,7 +120,8 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + unsigned AS) const override; private: const TargetMachine &TM; diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp index cffba5fee03f..03c5fa2e9c42 100644 --- a/lib/Target/XCore/XCoreMCInstLower.cpp +++ b/lib/Target/XCore/XCoreMCInstLower.cpp @@ -65,7 +65,7 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO, llvm_unreachable("<unknown operand type>"); } - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx); + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); if (!Offset) return MCOperand::createExpr(MCSym); @@ -73,8 +73,8 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO, // Assume offset is never negative. assert(Offset > 0); - const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); - const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); + const MCConstantExpr *OffsetExpr = MCConstantExpr::create(Offset, *Ctx); + const MCBinaryExpr *Add = MCBinaryExpr::createAdd(MCSym, OffsetExpr, *Ctx); return MCOperand::createExpr(Add); } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 7b7672d0edfe..c7c57ab56444 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -553,7 +553,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, LoadInst *Load = Loads[i]; BasicBlock *BB = Load->getParent(); - AliasAnalysis::Location Loc = AA.getLocation(Load); + AliasAnalysis::Location Loc = MemoryLocation::get(Load); if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc, AliasAnalysis::Mod)) return false; // Pointer is invalidated! @@ -774,7 +774,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( - STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call); + STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 92e384a340a7..ef8f42ffd6d4 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -232,20 +232,20 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { // Ignore non-volatile loads from local memory. (Atomic is okay here.) if (!LI->isVolatile()) { - AliasAnalysis::Location Loc = AA->getLocation(LI); + AliasAnalysis::Location Loc = MemoryLocation::get(LI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // Ignore non-volatile stores to local memory. (Atomic is okay here.) if (!SI->isVolatile()) { - AliasAnalysis::Location Loc = AA->getLocation(SI); + AliasAnalysis::Location Loc = MemoryLocation::get(SI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { // Ignore vaargs on local memory. - AliasAnalysis::Location Loc = AA->getLocation(VI); + AliasAnalysis::Location Loc = MemoryLocation::get(VI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 91a5eefca17d..052f1b4b1325 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -389,11 +389,21 @@ private: }; class FunctionNode { - AssertingVH<Function> F; + mutable AssertingVH<Function> F; public: FunctionNode(Function *F) : F(F) {} Function *getFunc() const { return F; } + + /// Replace the reference to the function F by the function G, assuming their + /// implementations are equal. + void replaceBy(Function *G) const { + assert(!(*this < FunctionNode(G)) && !(FunctionNode(G) < *this) && + "The two functions must be equal"); + + F = G; + } + void release() { F = 0; } bool operator<(const FunctionNode &RHS) const { return (FunctionComparator(F, RHS.getFunc()).compare()) == -1; @@ -1122,6 +1132,9 @@ private: /// Replace G with an alias to F. Deletes G. void writeAlias(Function *F, Function *G); + /// Replace function F with function G in the function tree. + void replaceFunctionInTree(FnTreeType::iterator &IterToF, Function *G); + /// The set of all distinct functions. Use the insert() and remove() methods /// to modify it. FnTreeType FnTree; @@ -1414,6 +1427,21 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { ++NumFunctionsMerged; } +/// Replace function F for function G in the map. +void MergeFunctions::replaceFunctionInTree(FnTreeType::iterator &IterToF, + Function *G) { + Function *F = IterToF->getFunc(); + + // A total order is already guaranteed otherwise because we process strong + // functions before weak functions. + assert(((F->mayBeOverridden() && G->mayBeOverridden()) || + (!F->mayBeOverridden() && !G->mayBeOverridden())) && + "Only change functions if both are strong or both are weak"); + (void)F; + + IterToF->replaceBy(G); +} + // Insert a ComparableFunction into the FnTree, or merge it away if equal to one // that was already inserted. bool MergeFunctions::insert(Function *NewFunction) { @@ -1439,6 +1467,22 @@ bool MergeFunctions::insert(Function *NewFunction) { } } + // Impose a total order (by name) on the replacement of functions. This is + // important when operating on more than one module independently to prevent + // cycles of thunks calling each other when the modules are linked together. + // + // When one function is weak and the other is strong there is an order imposed + // already. We process strong functions before weak functions. + if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) || + (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden())) + if (OldF.getFunc()->getName() > NewFunction->getName()) { + // Swap the two functions. + Function *F = OldF.getFunc(); + replaceFunctionInTree(Result.first, NewFunction); + NewFunction = F; + assert(OldF.getFunc() != F && "Must have swapped the functions."); + } + // Never thunk a strong function to a weak function. assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden()); @@ -1465,7 +1509,7 @@ void MergeFunctions::remove(Function *F) { if (Erased) { DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); - Deferred.push_back(F); + Deferred.emplace_back(F); } } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2dafa58d305e..f53eeef1dae6 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2149,6 +2149,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, if (WillNotOverflowSignedAdd(LHS, RHS, OrigI)) return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), true); + break; } case OCF_UNSIGNED_SUB: @@ -2194,6 +2195,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, if (WillNotOverflowSignedMul(LHS, RHS, OrigI)) return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), true); + break; } return false; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5aa59c69f39b..e7a45330d955 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -483,12 +483,17 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { } // Fold away bit casts of the loaded value by loading the desired type. + // We can do this for BitCastInsts as well as casts from and to pointer types, + // as long as those are noops (i.e., the source or dest type have the same + // bitwidth as the target's pointers). if (LI.hasOneUse()) - if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) { - LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy()); - BC->replaceAllUsesWith(NewLoad); - IC.EraseInstFromFunction(*BC); - return &LI; + if (auto* CI = dyn_cast<CastInst>(LI.user_back())) { + if (CI->isNoopCast(DL)) { + LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy()); + CI->replaceAllUsesWith(NewLoad); + IC.EraseInstFromFunction(*CI); + return &LI; + } } // FIXME: We should also canonicalize loads of vectors when their elements are diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index d2fbcdd39915..f51442a9f36d 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -276,72 +276,6 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, return nullptr; } -/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is -/// replaced with RepOp. -static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const TargetLibraryInfo *TLI, - const DataLayout &DL, DominatorTree *DT, - AssumptionCache *AC) { - // Trivial replacement. - if (V == Op) - return RepOp; - - Instruction *I = dyn_cast<Instruction>(V); - if (!I) - return nullptr; - - // If this is a binary operator, try to simplify it with the replaced op. - if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { - if (B->getOperand(0) == Op) - return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI); - if (B->getOperand(1) == Op) - return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI); - } - - // Same for CmpInsts. - if (CmpInst *C = dyn_cast<CmpInst>(I)) { - if (C->getOperand(0) == Op) - return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL, - TLI, DT, AC); - if (C->getOperand(1) == Op) - return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL, - TLI, DT, AC); - } - - // TODO: We could hand off more cases to instsimplify here. - - // If all operands are constant after substituting Op for RepOp then we can - // constant fold the instruction. - if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) { - // Build a list of all constant operands. - SmallVector<Constant*, 8> ConstOps; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (I->getOperand(i) == Op) - ConstOps.push_back(CRepOp); - else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i))) - ConstOps.push_back(COp); - else - break; - } - - // All operands were constants, fold it. - if (ConstOps.size() == I->getNumOperands()) { - if (CmpInst *C = dyn_cast<CmpInst>(I)) - return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], - ConstOps[1], DL, TLI); - - if (LoadInst *LI = dyn_cast<LoadInst>(I)) - if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(ConstOps[0], DL); - - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps, - DL, TLI); - } - } - - return nullptr; -} - /// foldSelectICmpAndOr - We want to turn: /// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) /// into: @@ -477,14 +411,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // here, so make sure the select is the only user. if (ICI->hasOneUse()) if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { - // X < MIN ? T : F --> F - if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT) - && CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X > MAX ? T : F --> F - else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT) - && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) - return ReplaceInstUsesWith(SI, FalseVal); switch (Pred) { default: break; case ICmpInst::ICMP_ULT: @@ -598,33 +524,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } - // If we have an equality comparison then we know the value in one of the - // arms of the select. See if substituting this value into the arm and - // simplifying the result yields the same value as the other arm. - if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == - TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == - TrueVal) - return ReplaceInstUsesWith(SI, FalseVal); - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == - FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == - FalseVal) - return ReplaceInstUsesWith(SI, FalseVal); - } else if (Pred == ICmpInst::ICMP_NE) { - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == - FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == - FalseVal) - return ReplaceInstUsesWith(SI, TrueVal); - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == - TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == - TrueVal) - return ReplaceInstUsesWith(SI, TrueVal); - } - // NOTE: if we wanted to, this is where to detect integer MIN/MAX if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) { @@ -639,7 +538,8 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } - if (unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits()) { + { + unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType()); APInt MinSignedValue = APInt::getSignBit(BitWidth); Value *X; const APInt *Y, *C; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index be49cd1c436b..9d602c6a9e22 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1843,7 +1843,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, case Instruction::BitCast: case Instruction::GetElementPtr: - Users.push_back(I); + Users.emplace_back(I); Worklist.push_back(I); continue; @@ -1852,7 +1852,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, // We can fold eq/ne comparisons with null to false/true, respectively. if (!ICI->isEquality() || !isa<ConstantPointerNull>(ICI->getOperand(1))) return false; - Users.push_back(I); + Users.emplace_back(I); continue; } @@ -1878,13 +1878,13 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::objectsize: - Users.push_back(I); + Users.emplace_back(I); continue; } } if (isFreeCall(I, TLI)) { - Users.push_back(I); + Users.emplace_back(I); continue; } return false; @@ -1893,7 +1893,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, StoreInst *SI = cast<StoreInst>(I); if (SI->isVolatile() || SI->getPointerOperand() != PI) return false; - Users.push_back(I); + Users.emplace_back(I); continue; } } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 939e04bb2a51..25f78b0b2a26 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -106,14 +106,15 @@ static const char *const kAsanUnpoisonStackMemoryName = static const char *const kAsanOptionDetectUAR = "__asan_option_detect_stack_use_after_return"; +static const char *const kAsanAllocaPoison = + "__asan_alloca_poison"; +static const char *const kAsanAllocasUnpoison = + "__asan_allocas_unpoison"; + // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; static const unsigned kAllocaRzSize = 32; -static const unsigned kAsanAllocaLeftMagic = 0xcacacacaU; -static const unsigned kAsanAllocaRightMagic = 0xcbcbcbcbU; -static const unsigned kAsanAllocaPartialVal1 = 0xcbcbcb00U; -static const unsigned kAsanAllocaPartialVal2 = 0x000000cbU; // Command-line flags. @@ -230,8 +231,6 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); -STATISTIC(NumInstrumentedDynamicAllocas, - "Number of instrumented dynamic allocas"); STATISTIC(NumOptimizedAccessesToGlobalVar, "Number of optimized accesses to global vars"); STATISTIC(NumOptimizedAccessesToStackVar, @@ -402,6 +401,12 @@ struct AddressSanitizer : public FunctionPass { } /// Check if we want (and can) handle this alloca. bool isInterestingAlloca(AllocaInst &AI); + + // Check if we have dynamic alloca. + bool isDynamicAlloca(AllocaInst &AI) const { + return AI.isArrayAllocation() || !AI.isStaticAlloca(); + } + /// If it is an interesting memory access, return the PointerOperand /// and set IsWrite/Alignment. Otherwise return nullptr. Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, @@ -517,6 +522,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1], *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1]; Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc; + Function *AsanAllocaPoisonFunc, *AsanAllocasUnpoisonFunc; // Stores a place and arguments of poisoning/unpoisoning call for alloca. struct AllocaPoisonCall { @@ -527,23 +533,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { }; SmallVector<AllocaPoisonCall, 8> AllocaPoisonCallVec; - // Stores left and right redzone shadow addresses for dynamic alloca - // and pointer to alloca instruction itself. - // LeftRzAddr is a shadow address for alloca left redzone. - // RightRzAddr is a shadow address for alloca right redzone. - struct DynamicAllocaCall { - AllocaInst *AI; - Value *LeftRzAddr; - Value *RightRzAddr; - bool Poison; - explicit DynamicAllocaCall(AllocaInst *AI, Value *LeftRzAddr = nullptr, - Value *RightRzAddr = nullptr) - : AI(AI), - LeftRzAddr(LeftRzAddr), - RightRzAddr(RightRzAddr), - Poison(true) {} - }; - SmallVector<DynamicAllocaCall, 1> DynamicAllocaVec; + SmallVector<AllocaInst *, 1> DynamicAllocaVec; + SmallVector<IntrinsicInst *, 1> StackRestoreVec; + AllocaInst *DynamicAllocaLayout = nullptr; // Maps Value to an AllocaInst from which the Value is originated. typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy; @@ -586,41 +578,29 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { // Then unpoison everything back before the function returns. void poisonStack(); + void createDynamicAllocasInitStorage(); + // ----------------------- Visitors. /// \brief Collect all Ret instructions. void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); } - // Unpoison dynamic allocas redzones. - void unpoisonDynamicAlloca(DynamicAllocaCall &AllocaCall) { - if (!AllocaCall.Poison) return; - for (auto Ret : RetVec) { - IRBuilder<> IRBRet(Ret); - PointerType *Int32PtrTy = PointerType::getUnqual(IRBRet.getInt32Ty()); - Value *Zero = Constant::getNullValue(IRBRet.getInt32Ty()); - Value *PartialRzAddr = IRBRet.CreateSub(AllocaCall.RightRzAddr, - ConstantInt::get(IntptrTy, 4)); - IRBRet.CreateStore( - Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy)); - IRBRet.CreateStore(Zero, - IRBRet.CreateIntToPtr(PartialRzAddr, Int32PtrTy)); - IRBRet.CreateStore( - Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy)); - } + void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore, + Value *SavedStack) { + IRBuilder<> IRB(InstBefore); + IRB.CreateCall(AsanAllocasUnpoisonFunc, + {IRB.CreateLoad(DynamicAllocaLayout), + IRB.CreatePtrToInt(SavedStack, IntptrTy)}); } - // Right shift for BigEndian and left shift for LittleEndian. - Value *shiftAllocaMagic(Value *Val, IRBuilder<> &IRB, Value *Shift) { - auto &DL = F.getParent()->getDataLayout(); - return DL.isLittleEndian() ? IRB.CreateShl(Val, Shift) - : IRB.CreateLShr(Val, Shift); - } + // Unpoison dynamic allocas redzones. + void unpoisonDynamicAllocas() { + for (auto &Ret : RetVec) + unpoisonDynamicAllocasBeforeInst(Ret, DynamicAllocaLayout); - // Compute PartialRzMagic for dynamic alloca call. Since we don't know the - // size of requested memory until runtime, we should compute it dynamically. - // If PartialSize is 0, PartialRzMagic would contain kAsanAllocaRightMagic, - // otherwise it would contain the value that we will use to poison the - // partial redzone for alloca call. - Value *computePartialRzMagic(Value *PartialSize, IRBuilder<> &IRB); + for (auto &StackRestoreInst : StackRestoreVec) + unpoisonDynamicAllocasBeforeInst(StackRestoreInst, + StackRestoreInst->getOperand(0)); + } // Deploy and poison redzones around dynamic alloca call. To do this, we // should replace this call with another one with changed parameters and @@ -632,20 +612,15 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { // addr = tmp + 32 (first 32 bytes are for the left redzone). // Additional_size is added to make new memory allocation contain not only // requested memory, but also left, partial and right redzones. - // After that, we should poison redzones: - // (1) Left redzone with kAsanAllocaLeftMagic. - // (2) Partial redzone with the value, computed in runtime by - // computePartialRzMagic function. - // (3) Right redzone with kAsanAllocaRightMagic. - void handleDynamicAllocaCall(DynamicAllocaCall &AllocaCall); + void handleDynamicAllocaCall(AllocaInst *AI); /// \brief Collect Alloca instructions we want (and can) handle. void visitAllocaInst(AllocaInst &AI) { if (!ASan.isInterestingAlloca(AI)) return; StackAlignment = std::max(StackAlignment, AI.getAlignment()); - if (isDynamicAlloca(AI)) - DynamicAllocaVec.push_back(DynamicAllocaCall(&AI)); + if (ASan.isDynamicAlloca(AI)) + DynamicAllocaVec.push_back(&AI); else AllocaVec.push_back(&AI); } @@ -653,8 +628,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { /// \brief Collect lifetime intrinsic calls to check for use-after-scope /// errors. void visitIntrinsicInst(IntrinsicInst &II) { - if (!ClCheckLifetime) return; Intrinsic::ID ID = II.getIntrinsicID(); + if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II); + if (!ClCheckLifetime) return; if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end) return; // Found lifetime intrinsic, add ASan instrumentation if necessary. @@ -690,9 +666,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { return true; } - bool isDynamicAlloca(AllocaInst &AI) const { - return AI.isArrayAllocation() || !AI.isStaticAlloca(); - } /// Finds alloca where the value comes from. AllocaInst *findAllocaForValue(Value *V); void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB, @@ -811,12 +784,14 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) { if (PreviouslySeenAllocaInfo != ProcessedAllocas.end()) return PreviouslySeenAllocaInfo->getSecond(); - bool IsInteresting = (AI.getAllocatedType()->isSized() && - // alloca() may be called with 0 size, ignore it. - getAllocaSizeInBytes(&AI) > 0 && - // We are only interested in allocas not promotable to registers. - // Promotable allocas are common under -O0. - (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); + bool IsInteresting = + (AI.getAllocatedType()->isSized() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(&AI) > 0 && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI) || + isDynamicAlloca(AI))); ProcessedAllocas[&AI] = IsInteresting; return IsInteresting; @@ -1158,6 +1133,18 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { if (G->hasSection()) { StringRef Section(G->getSection()); + // Globals from llvm.metadata aren't emitted, do not instrument them. + if (Section == "llvm.metadata") return false; + + // Callbacks put into the CRT initializer/terminator sections + // should not be instrumented. + // See https://code.google.com/p/address-sanitizer/issues/detail?id=305 + // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx + if (Section.startswith(".CRT")) { + DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n"); + return false; + } + if (TargetTriple.isOSBinFormatMachO()) { StringRef ParsedSegment, ParsedSection; unsigned TAA = 0, StubSize = 0; @@ -1165,8 +1152,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier( Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { - report_fatal_error("Invalid section specifier '" + ParsedSection + - "': " + ErrorCode + "."); + assert(false && "Invalid section specifier."); + return false; } // Ignore the globals from the __OBJC section. The ObjC runtime assumes @@ -1196,18 +1183,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { return false; } } - - // Callbacks put into the CRT initializer/terminator sections - // should not be instrumented. - // See https://code.google.com/p/address-sanitizer/issues/detail?id=305 - // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx - if (Section.startswith(".CRT")) { - DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n"); - return false; - } - - // Globals from llvm.metadata aren't emitted, do not instrument them. - if (Section == "llvm.metadata") return false; } return true; @@ -1617,6 +1592,11 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + AsanAllocasUnpoisonFunc = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } void FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, @@ -1712,15 +1692,24 @@ Value *FunctionStackPoisoner::createAllocaForLayout( return IRB.CreatePointerCast(Alloca, IntptrTy); } +void FunctionStackPoisoner::createDynamicAllocasInitStorage() { + BasicBlock &FirstBB = *F.begin(); + IRBuilder<> IRB(dyn_cast<Instruction>(FirstBB.begin())); + DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr); + IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout); + DynamicAllocaLayout->setAlignment(32); +} + void FunctionStackPoisoner::poisonStack() { assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0); - if (ClInstrumentAllocas) { + if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) { // Handle dynamic allocas. - for (auto &AllocaCall : DynamicAllocaVec) { - handleDynamicAllocaCall(AllocaCall); - unpoisonDynamicAlloca(AllocaCall); - } + createDynamicAllocasInitStorage(); + for (auto &AI : DynamicAllocaVec) + handleDynamicAllocaCall(AI); + + unpoisonDynamicAllocas(); } if (AllocaVec.size() == 0) return; @@ -1955,78 +1944,25 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { return Res; } -// Compute PartialRzMagic for dynamic alloca call. PartialRzMagic is -// constructed from two separate 32-bit numbers: PartialRzMagic = Val1 | Val2. -// (1) Val1 is resposible for forming base value for PartialRzMagic, containing -// only 00 for fully addressable and 0xcb for fully poisoned bytes for each -// 8-byte chunk of user memory respectively. -// (2) Val2 forms the value for marking first poisoned byte in shadow memory -// with appropriate value (0x01 - 0x07 or 0xcb if Padding % 8 == 0). - -// Shift = Padding & ~7; // the number of bits we need to shift to access first -// chunk in shadow memory, containing nonzero bytes. -// Example: -// Padding = 21 Padding = 16 -// Shadow: |00|00|05|cb| Shadow: |00|00|cb|cb| -// ^ ^ -// | | -// Shift = 21 & ~7 = 16 Shift = 16 & ~7 = 16 -// -// Val1 = 0xcbcbcbcb << Shift; -// PartialBits = Padding ? Padding & 7 : 0xcb; -// Val2 = PartialBits << Shift; -// Result = Val1 | Val2; -Value *FunctionStackPoisoner::computePartialRzMagic(Value *PartialSize, - IRBuilder<> &IRB) { - PartialSize = IRB.CreateIntCast(PartialSize, IRB.getInt32Ty(), false); - Value *Shift = IRB.CreateAnd(PartialSize, IRB.getInt32(~7)); - unsigned Val1Int = kAsanAllocaPartialVal1; - unsigned Val2Int = kAsanAllocaPartialVal2; - if (!F.getParent()->getDataLayout().isLittleEndian()) { - Val1Int = sys::getSwappedBytes(Val1Int); - Val2Int = sys::getSwappedBytes(Val2Int); - } - Value *Val1 = shiftAllocaMagic(IRB.getInt32(Val1Int), IRB, Shift); - Value *PartialBits = IRB.CreateAnd(PartialSize, IRB.getInt32(7)); - // For BigEndian get 0x000000YZ -> 0xYZ000000. - if (F.getParent()->getDataLayout().isBigEndian()) - PartialBits = IRB.CreateShl(PartialBits, IRB.getInt32(24)); - Value *Val2 = IRB.getInt32(Val2Int); - Value *Cond = - IRB.CreateICmpNE(PartialBits, Constant::getNullValue(IRB.getInt32Ty())); - Val2 = IRB.CreateSelect(Cond, shiftAllocaMagic(PartialBits, IRB, Shift), - shiftAllocaMagic(Val2, IRB, Shift)); - return IRB.CreateOr(Val1, Val2); -} - -void FunctionStackPoisoner::handleDynamicAllocaCall( - DynamicAllocaCall &AllocaCall) { - AllocaInst *AI = AllocaCall.AI; - if (!doesDominateAllExits(AI)) { - // We do not yet handle complex allocas - AllocaCall.Poison = false; - return; - } - +void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { IRBuilder<> IRB(AI); - PointerType *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); const unsigned Align = std::max(kAllocaRzSize, AI->getAlignment()); const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1; Value *Zero = Constant::getNullValue(IntptrTy); Value *AllocaRzSize = ConstantInt::get(IntptrTy, kAllocaRzSize); Value *AllocaRzMask = ConstantInt::get(IntptrTy, AllocaRedzoneMask); - Value *NotAllocaRzMask = ConstantInt::get(IntptrTy, ~AllocaRedzoneMask); // Since we need to extend alloca with additional memory to locate // redzones, and OldSize is number of allocated blocks with // ElementSize size, get allocated memory size in bytes by // OldSize * ElementSize. - unsigned ElementSize = + const unsigned ElementSize = F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType()); - Value *OldSize = IRB.CreateMul(AI->getArraySize(), - ConstantInt::get(IntptrTy, ElementSize)); + Value *OldSize = + IRB.CreateMul(IRB.CreateIntCast(AI->getArraySize(), IntptrTy, false), + ConstantInt::get(IntptrTy, ElementSize)); // PartialSize = OldSize % 32 Value *PartialSize = IRB.CreateAnd(OldSize, AllocaRzMask); @@ -2054,43 +1990,20 @@ void FunctionStackPoisoner::handleDynamicAllocaCall( Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy), ConstantInt::get(IntptrTy, Align)); - Value *NewAddressPtr = IRB.CreateIntToPtr(NewAddress, AI->getType()); - - // LeftRzAddress = NewAddress - kAllocaRzSize - Value *LeftRzAddress = IRB.CreateSub(NewAddress, AllocaRzSize); - - // Poisoning left redzone. - AllocaCall.LeftRzAddr = ASan.memToShadow(LeftRzAddress, IRB); - IRB.CreateStore(ConstantInt::get(IRB.getInt32Ty(), kAsanAllocaLeftMagic), - IRB.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy)); + // Insert __asan_alloca_poison call for new created alloca. + IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize}); - // PartialRzAligned = PartialRzAddr & ~AllocaRzMask - Value *PartialRzAddr = IRB.CreateAdd(NewAddress, OldSize); - Value *PartialRzAligned = IRB.CreateAnd(PartialRzAddr, NotAllocaRzMask); + // Store the last alloca's address to DynamicAllocaLayout. We'll need this + // for unpoisoning stuff. + IRB.CreateStore(IRB.CreatePtrToInt(NewAlloca, IntptrTy), DynamicAllocaLayout); - // Poisoning partial redzone. - Value *PartialRzMagic = computePartialRzMagic(PartialSize, IRB); - Value *PartialRzShadowAddr = ASan.memToShadow(PartialRzAligned, IRB); - IRB.CreateStore(PartialRzMagic, - IRB.CreateIntToPtr(PartialRzShadowAddr, Int32PtrTy)); - - // RightRzAddress - // = (PartialRzAddr + AllocaRzMask) & ~AllocaRzMask - Value *RightRzAddress = IRB.CreateAnd( - IRB.CreateAdd(PartialRzAddr, AllocaRzMask), NotAllocaRzMask); - - // Poisoning right redzone. - AllocaCall.RightRzAddr = ASan.memToShadow(RightRzAddress, IRB); - IRB.CreateStore(ConstantInt::get(IRB.getInt32Ty(), kAsanAllocaRightMagic), - IRB.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy)); + Value *NewAddressPtr = IRB.CreateIntToPtr(NewAddress, AI->getType()); - // Replace all uses of AddessReturnedByAlloca with NewAddress. + // Replace all uses of AddessReturnedByAlloca with NewAddressPtr. AI->replaceAllUsesWith(NewAddressPtr); - // We are done. Erase old alloca and store left, partial and right redzones - // shadow addresses for future unpoisoning. + // We are done. Erase old alloca from parent. AI->eraseFromParent(); - NumInstrumentedDynamicAllocas++; } // isSafeAccess returns true if Addr is always inbounds with respect to its diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 610ff52ba9db..05a9c8a5dfe5 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -146,8 +146,8 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { IRBuilder<> Builder(Inc->getParent(), *Inc); uint64_t Index = Inc->getIndex()->getZExtValue(); - llvm::Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index); - llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount"); + Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index); + Value *Count = Builder.CreateLoad(Addr, "pgocount"); Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr)); Inc->eraseFromParent(); @@ -196,14 +196,17 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { if (It != RegionCounters.end()) return It->second; - // Move the name variable to the right section. + // Move the name variable to the right section. Make sure it is placed in the + // same comdat as its associated function. Otherwise, we may get multiple + // counters for the same function in certain cases. + Function *Fn = Inc->getParent()->getParent(); Name->setSection(getNameSection()); Name->setAlignment(1); + Name->setComdat(Fn->getComdat()); uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); - Function *Fn = Inc->getParent()->getParent(); // Create the counters variable. auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(), @@ -212,9 +215,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { Counters->setVisibility(Name->getVisibility()); Counters->setSection(getCountersSection()); Counters->setAlignment(8); - // Place the counters in the same comdat section as its parent function. - // Otherwise, we may get multiple counters for the same function in certain - // cases. Counters->setComdat(Fn->getComdat()); RegionCounters[Inc->getName()] = Counters; @@ -263,7 +263,7 @@ void InstrProfiling::emitRegistration() { if (Options.NoRedZone) RegisterF->addFnAttr(Attribute::NoRedZone); - auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy, false); + auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false); auto *RuntimeRegisterF = Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage, "__llvm_profile_register_function", M); @@ -310,7 +310,7 @@ void InstrProfiling::emitUses() { return; GlobalVariable *LLVMUsed = M->getGlobalVariable("llvm.used"); - std::vector<Constant*> MergedVars; + std::vector<Constant *> MergedVars; if (LLVMUsed) { // Collect the existing members of llvm.used. ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); @@ -323,13 +323,13 @@ void InstrProfiling::emitUses() { // Add uses for our data. for (auto *Value : UsedVars) MergedVars.push_back( - ConstantExpr::getBitCast(cast<llvm::Constant>(Value), i8PTy)); + ConstantExpr::getBitCast(cast<Constant>(Value), i8PTy)); // Recreate llvm.used. ArrayType *ATy = ArrayType::get(i8PTy, MergedVars.size()); - LLVMUsed = new llvm::GlobalVariable( - *M, ATy, false, llvm::GlobalValue::AppendingLinkage, - llvm::ConstantArray::get(ATy, MergedVars), "llvm.used"); + LLVMUsed = + new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, + ConstantArray::get(ATy, MergedVars), "llvm.used"); LLVMUsed->setSection("llvm.metadata"); } diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 2a3139f8705d..e7731ad5cd17 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -200,7 +200,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load, bool SawRelease = false; // Get the location associated with Load. - AliasAnalysis::Location Loc = AA->getLocation(Load); + AliasAnalysis::Location Loc = MemoryLocation::get(Load); // Walk down to find the store and the release, which may be in either order. for (auto I = std::next(BasicBlock::iterator(Load)), diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index d1302c6e22f4..79624b2e4c47 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -113,10 +113,11 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { Value *Condition = SI->getCondition(); if (!Condition->getType()->isVectorTy()) { - if (Constant *C = LVI->getConstantOnEdge(Condition, P->getIncomingBlock(i), BB, P)) { - if (C == ConstantInt::getTrue(Condition->getType())) { + if (Constant *C = LVI->getConstantOnEdge( + Condition, P->getIncomingBlock(i), BB, P)) { + if (C->isOneValue()) { V = SI->getTrueValue(); - } else { + } else if (C->isZeroValue()) { V = SI->getFalseValue(); } // Once LVI learns to handle vector types, we could also add support diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 01952cf6e8b3..eb48a766a2cf 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -197,11 +197,11 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { static AliasAnalysis::Location getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) - return AA.getLocation(SI); + return MemoryLocation::get(SI); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { // memcpy/memmove/memset. - AliasAnalysis::Location Loc = AA.getLocationForDest(MI); + AliasAnalysis::Location Loc = MemoryLocation::getForDest(MI); return Loc; } @@ -231,7 +231,7 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) { // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) - return AA.getLocationForSource(MTI); + return MemoryLocation::getForSource(MTI); return AliasAnalysis::Location(); } @@ -815,11 +815,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { if (!L->isUnordered()) // Be conservative with atomic/volatile load break; - LoadedLoc = AA->getLocation(L); + LoadedLoc = MemoryLocation::get(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { - LoadedLoc = AA->getLocation(V); + LoadedLoc = MemoryLocation::get(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { - LoadedLoc = AA->getLocationForSource(MTI); + LoadedLoc = MemoryLocation::getForSource(MTI); } else if (!BBI->mayReadFromMemory()) { // Instruction doesn't read memory. Note that stores that weren't removed // above will hit this case. diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 600589c904c4..359a616c069d 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -68,6 +68,22 @@ static cl::opt<bool> VerifyIndvars( static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden, cl::desc("Reduce live induction variables.")); +enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl }; + +static cl::opt<ReplaceExitVal> ReplaceExitValue( + "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), + cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), + cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), + clEnumValN(OnlyCheapRepl, "cheap", + "only replace exit value when the cost is cheap"), + clEnumValN(AlwaysRepl, "always", + "always replace exit value whenever possible"), + clEnumValEnd)); + +namespace { +struct RewritePhi; +} + namespace { class IndVarSimplify : public LoopPass { LoopInfo *LI; @@ -112,6 +128,7 @@ namespace { void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM); + bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, @@ -464,6 +481,21 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { SE->forgetLoop(L); } +namespace { +// Collect information about PHI nodes which can be transformed in +// RewriteLoopExitValues. +struct RewritePhi { + PHINode *PN; + unsigned Ith; // Ith incoming value. + Value *Val; // Exit value after expansion. + bool HighCost; // High Cost when expansion. + bool SafePhi; // LCSSASafePhiForRAUW. + + RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S) + : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {} +}; +} + //===----------------------------------------------------------------------===// // RewriteLoopExitValues - Optimize IV users outside the loop. // As a side effect, reduces the amount of IV processing within the loop. @@ -486,6 +518,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); + SmallVector<RewritePhi, 8> RewritePhiSet; // Find all values that are computed inside the loop, but used outside of it. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan // the exit blocks of the loop to find them. @@ -604,23 +637,44 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { DeadInsts.push_back(ExitVal); continue; } - Changed = true; - ++NumReplaced; + bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L); - PN->setIncomingValue(i, ExitVal); + // Collect all the candidate PHINodes to be rewritten. + RewritePhiSet.push_back( + RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW)); + } + } + } - // If this instruction is dead now, delete it. Don't do it now to avoid - // invalidating iterators. - if (isInstructionTriviallyDead(Inst, TLI)) - DeadInsts.push_back(Inst); + bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet); - // If we determined that this PHI is safe to replace even if an LCSSA - // PHI, do so. - if (LCSSASafePhiForRAUW) { - PN->replaceAllUsesWith(ExitVal); - PN->eraseFromParent(); - } - } + // Transformation. + for (const RewritePhi &Phi : RewritePhiSet) { + PHINode *PN = Phi.PN; + Value *ExitVal = Phi.Val; + + // Only do the rewrite when the ExitValue can be expanded cheaply. + // If LoopCanBeDel is true, rewrite exit value aggressively. + if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { + DeadInsts.push_back(ExitVal); + continue; + } + + Changed = true; + ++NumReplaced; + Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); + PN->setIncomingValue(Phi.Ith, ExitVal); + + // If this instruction is dead now, delete it. Don't do it now to avoid + // invalidating iterators. + if (isInstructionTriviallyDead(Inst, TLI)) + DeadInsts.push_back(Inst); + + // If we determined that this PHI is safe to replace even if an LCSSA + // PHI, do so. + if (Phi.SafePhi) { + PN->replaceAllUsesWith(ExitVal); + PN->eraseFromParent(); } } @@ -629,6 +683,65 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { Rewriter.clearInsertPoint(); } +/// CanLoopBeDeleted - Check whether it is possible to delete the loop after +/// rewriting exit value. If it is possible, ignore ReplaceExitValue and +/// do rewriting aggressively. +bool IndVarSimplify::CanLoopBeDeleted( + Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { + + BasicBlock *Preheader = L->getLoopPreheader(); + // If there is no preheader, the loop will not be deleted. + if (!Preheader) + return false; + + // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. + // We obviate multiple ExitingBlocks case for simplicity. + // TODO: If we see testcase with multiple ExitingBlocks can be deleted + // after exit value rewriting, we can enhance the logic here. + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + SmallVector<BasicBlock *, 8> ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) + return false; + + BasicBlock *ExitBlock = ExitBlocks[0]; + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast<PHINode>(BI)) { + Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); + + // If the Incoming value of P is found in RewritePhiSet, we know it + // could be rewritten to use a loop invariant value in transformation + // phase later. Skip it in the loop invariant check below. + bool found = false; + for (const RewritePhi &Phi : RewritePhiSet) { + unsigned i = Phi.Ith; + if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { + found = true; + break; + } + } + + Instruction *I; + if (!found && (I = dyn_cast<Instruction>(Incoming))) + if (!L->hasLoopInvariantOperands(I)) + return false; + + ++BI; + } + + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) { + for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE; + ++BI) { + if (BI->mayHaveSideEffects()) + return false; + } + } + + return true; +} + //===----------------------------------------------------------------------===// // IV Widening - Extend the width of an IV to cover its widest uses. //===----------------------------------------------------------------------===// @@ -989,7 +1102,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt()); Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); UsePhi->replaceAllUsesWith(Trunc); - DeadInsts.push_back(UsePhi); + DeadInsts.emplace_back(UsePhi); DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " << *WidePhi << "\n"); } @@ -1022,7 +1135,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { << " replaced by " << *DU.WideDef << "\n"); ++NumElimExt; DU.NarrowUse->replaceAllUsesWith(NewDef); - DeadInsts.push_back(DU.NarrowUse); + DeadInsts.emplace_back(DU.NarrowUse); } // Now that the extend is gone, we want to expose it's uses for potential // further simplification. We don't need to directly inform SimplifyIVUsers @@ -1075,7 +1188,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { if (WideAddRec != SE->getSCEV(WideUse)) { DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); - DeadInsts.push_back(WideUse); + DeadInsts.emplace_back(WideUse); return nullptr; } @@ -1172,7 +1285,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // WidenIVUse may have removed the def-use edge. if (DU.NarrowDef->use_empty()) - DeadInsts.push_back(DU.NarrowDef); + DeadInsts.emplace_back(DU.NarrowDef); } return WidePhi; } @@ -1867,7 +1980,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // loop into any instructions outside of the loop that use the final values of // the current expressions. // - if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) + if (ReplaceExitValue != NeverRepl && + !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV cycles. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 584c7aee7f1d..4b59f3d2f6cc 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -811,7 +811,7 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { if (Instruction *U = dyn_cast<Instruction>(O)) { O = nullptr; if (U->use_empty()) - DeadInsts.push_back(U); + DeadInsts.emplace_back(U); } I->eraseFromParent(); @@ -2917,7 +2917,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain"); } Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper); - DeadInsts.push_back(Inc.IVOperand); + DeadInsts.emplace_back(Inc.IVOperand); } // If LSR created a new, wider phi, we may also replace its postinc. We only // do this if we also found a wide value for the head of the chain. @@ -2939,7 +2939,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain"); } Phi->replaceUsesOfWith(PostIncV, IVOper); - DeadInsts.push_back(PostIncV); + DeadInsts.emplace_back(PostIncV); } } } @@ -4594,7 +4594,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // form, update the ICmp's other operand. if (LU.Kind == LSRUse::ICmpZero) { ICmpInst *CI = cast<ICmpInst>(LF.UserInst); - DeadInsts.push_back(CI->getOperand(1)); + DeadInsts.emplace_back(CI->getOperand(1)); assert(!F.BaseGV && "ICmp does not support folding a global value and " "a scale at the same time!"); if (F.Scale == -1) { @@ -4737,7 +4737,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF, LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV); } - DeadInsts.push_back(LF.OperandValToReplace); + DeadInsts.emplace_back(LF.OperandValToReplace); } /// ImplementSolution - Rewrite all the fixup locations with new values, diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index ccafd100ef0f..4ccbfc953e0c 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -38,25 +38,25 @@ using namespace llvm; #define DEBUG_TYPE "loop-unroll" static cl::opt<unsigned> -UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, - cl::desc("The cut-off point for automatic loop unrolling")); + UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, + cl::desc("The baseline cost threshold for loop unrolling")); + +static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold( + "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden, + cl::desc("The percentage of estimated dynamic cost which must be saved by " + "unrolling to allow unrolling up to the max threshold.")); + +static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount( + "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden, + cl::desc("This is the amount discounted from the total unroll cost when " + "the unrolled form has a high dynamic cost savings (triggered by " + "the '-unroll-perecent-dynamic-cost-saved-threshold' flag).")); static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze( "unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability")); -static cl::opt<unsigned> UnrollMinPercentOfOptimized( - "unroll-percent-of-optimized-for-complete-unroll", cl::init(20), cl::Hidden, - cl::desc("If complete unrolling could trigger further optimizations, and, " - "by that, remove the given percent of instructions, perform the " - "complete unroll even if it's beyond the threshold")); - -static cl::opt<unsigned> UnrollAbsoluteThreshold( - "unroll-absolute-threshold", cl::init(2000), cl::Hidden, - cl::desc("Don't unroll if the unrolled size is bigger than this threshold," - " even if we can remove big portion of instructions later.")); - static cl::opt<unsigned> UnrollCount("unroll-count", cl::init(0), cl::Hidden, cl::desc("Use this unroll count for all loops including those with " @@ -82,16 +82,18 @@ namespace { static char ID; // Pass ID, replacement for typeid LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) { CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); - CurrentAbsoluteThreshold = UnrollAbsoluteThreshold; - CurrentMinPercentOfOptimized = UnrollMinPercentOfOptimized; + CurrentPercentDynamicCostSavedThreshold = + UnrollPercentDynamicCostSavedThreshold; + CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount; CurrentCount = (C == -1) ? UnrollCount : unsigned(C); CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R; UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); - UserAbsoluteThreshold = (UnrollAbsoluteThreshold.getNumOccurrences() > 0); - UserPercentOfOptimized = - (UnrollMinPercentOfOptimized.getNumOccurrences() > 0); + UserPercentDynamicCostSavedThreshold = + (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0); + UserDynamicCostSavingsDiscount = + (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0); UserAllowPartial = (P != -1) || (UnrollAllowPartial.getNumOccurrences() > 0); UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0); @@ -115,18 +117,18 @@ namespace { unsigned CurrentCount; unsigned CurrentThreshold; - unsigned CurrentAbsoluteThreshold; - unsigned CurrentMinPercentOfOptimized; - bool CurrentAllowPartial; - bool CurrentRuntime; - bool UserCount; // CurrentCount is user-specified. - bool UserThreshold; // CurrentThreshold is user-specified. - bool UserAbsoluteThreshold; // CurrentAbsoluteThreshold is - // user-specified. - bool UserPercentOfOptimized; // CurrentMinPercentOfOptimized is - // user-specified. - bool UserAllowPartial; // CurrentAllowPartial is user-specified. - bool UserRuntime; // CurrentRuntime is user-specified. + unsigned CurrentPercentDynamicCostSavedThreshold; + unsigned CurrentDynamicCostSavingsDiscount; + bool CurrentAllowPartial; + bool CurrentRuntime; + + // Flags for whether the 'current' settings are user-specified. + bool UserCount; + bool UserThreshold; + bool UserPercentDynamicCostSavedThreshold; + bool UserDynamicCostSavingsDiscount; + bool UserAllowPartial; + bool UserRuntime; bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -156,8 +158,9 @@ namespace { void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI, TargetTransformInfo::UnrollingPreferences &UP) { UP.Threshold = CurrentThreshold; - UP.AbsoluteThreshold = CurrentAbsoluteThreshold; - UP.MinPercentOfOptimized = CurrentMinPercentOfOptimized; + UP.PercentDynamicCostSavedThreshold = + CurrentPercentDynamicCostSavedThreshold; + UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount; UP.OptSizeThreshold = OptSizeUnrollThreshold; UP.PartialThreshold = CurrentThreshold; UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; @@ -186,8 +189,8 @@ namespace { void selectThresholds(const Loop *L, bool HasPragma, const TargetTransformInfo::UnrollingPreferences &UP, unsigned &Threshold, unsigned &PartialThreshold, - unsigned &AbsoluteThreshold, - unsigned &PercentOfOptimizedForCompleteUnroll) { + unsigned &PercentDynamicCostSavedThreshold, + unsigned &DynamicCostSavingsDiscount) { // Determine the current unrolling threshold. While this is // normally set from UnrollThreshold, it is overridden to a // smaller value if the current function is marked as @@ -195,11 +198,13 @@ namespace { // specified. Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold; - AbsoluteThreshold = UserAbsoluteThreshold ? CurrentAbsoluteThreshold - : UP.AbsoluteThreshold; - PercentOfOptimizedForCompleteUnroll = UserPercentOfOptimized - ? CurrentMinPercentOfOptimized - : UP.MinPercentOfOptimized; + PercentDynamicCostSavedThreshold = + UserPercentDynamicCostSavedThreshold + ? CurrentPercentDynamicCostSavedThreshold + : UP.PercentDynamicCostSavedThreshold; + DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount + ? CurrentDynamicCostSavingsDiscount + : UP.DynamicCostSavingsDiscount; if (!UserThreshold && L->getHeader()->getParent()->hasFnAttribute( @@ -220,9 +225,9 @@ namespace { } } bool canUnrollCompletely(Loop *L, unsigned Threshold, - unsigned AbsoluteThreshold, uint64_t UnrolledSize, - unsigned NumberOfOptimizedInstructions, - unsigned PercentOfOptimizedForCompleteUnroll); + unsigned PercentDynamicCostSavedThreshold, + unsigned DynamicCostSavingsDiscount, + uint64_t UnrolledCost, uint64_t RolledDynamicCost); }; } @@ -246,187 +251,6 @@ Pass *llvm::createSimpleLoopUnrollPass() { } namespace { -/// \brief SCEV expressions visitor used for finding expressions that would -/// become constants if the loop L is unrolled. -struct FindConstantPointers { - /// \brief Shows whether the expression is ConstAddress+Constant or not. - bool IndexIsConstant; - - /// \brief Used for filtering out SCEV expressions with two or more AddRec - /// subexpressions. - /// - /// Used to filter out complicated SCEV expressions, having several AddRec - /// sub-expressions. We don't handle them, because unrolling one loop - /// would help to replace only one of these inductions with a constant, and - /// consequently, the expression would remain non-constant. - bool HaveSeenAR; - - /// \brief If the SCEV expression becomes ConstAddress+Constant, this value - /// holds ConstAddress. Otherwise, it's nullptr. - Value *BaseAddress; - - /// \brief The loop, which we try to completely unroll. - const Loop *L; - - ScalarEvolution &SE; - - FindConstantPointers(const Loop *L, ScalarEvolution &SE) - : IndexIsConstant(true), HaveSeenAR(false), BaseAddress(nullptr), - L(L), SE(SE) {} - - /// Examine the given expression S and figure out, if it can be a part of an - /// expression, that could become a constant after the loop is unrolled. - /// The routine sets IndexIsConstant and HaveSeenAR according to the analysis - /// results. - /// \returns true if we need to examine subexpressions, and false otherwise. - bool follow(const SCEV *S) { - if (const SCEVUnknown *SC = dyn_cast<SCEVUnknown>(S)) { - // We've reached the leaf node of SCEV, it's most probably just a - // variable. - // If it's the only one SCEV-subexpression, then it might be a base - // address of an index expression. - // If we've already recorded base address, then just give up on this SCEV - // - it's too complicated. - if (BaseAddress) { - IndexIsConstant = false; - return false; - } - BaseAddress = SC->getValue(); - return false; - } - if (isa<SCEVConstant>(S)) - return false; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - // If the current SCEV expression is AddRec, and its loop isn't the loop - // we are about to unroll, then we won't get a constant address after - // unrolling, and thus, won't be able to eliminate the load. - if (AR->getLoop() != L) { - IndexIsConstant = false; - return false; - } - // We don't handle multiple AddRecs here, so give up in this case. - if (HaveSeenAR) { - IndexIsConstant = false; - return false; - } - HaveSeenAR = true; - } - - // Continue traversal. - return true; - } - bool isDone() const { return !IndexIsConstant; } -}; -} // End anonymous namespace. - -namespace { -/// \brief A cache of SCEV results used to optimize repeated queries to SCEV on -/// the same set of instructions. -/// -/// The primary cost this saves is the cost of checking the validity of a SCEV -/// every time it is looked up. However, in some cases we can provide a reduced -/// and especially useful model for an instruction based upon SCEV that is -/// non-trivial to compute but more useful to clients. -class SCEVCache { -public: - /// \brief Struct to represent a GEP whose start and step are known fixed - /// offsets from a base address due to SCEV's analysis. - struct GEPDescriptor { - Value *BaseAddr = nullptr; - unsigned Start = 0; - unsigned Step = 0; - }; - - Optional<GEPDescriptor> getGEPDescriptor(GetElementPtrInst *GEP); - - SCEVCache(const Loop &L, ScalarEvolution &SE) : L(L), SE(SE) {} - -private: - const Loop &L; - ScalarEvolution &SE; - - SmallDenseMap<GetElementPtrInst *, GEPDescriptor> GEPDescriptors; -}; -} // End anonymous namespace. - -/// \brief Get a simplified descriptor for a GEP instruction. -/// -/// Where possible, this produces a simplified descriptor for a GEP instruction -/// using SCEV analysis of the containing loop. If this isn't possible, it -/// returns an empty optional. -/// -/// The model is a base address, an initial offset, and a per-iteration step. -/// This fits very common patterns of GEPs inside loops and is something we can -/// use to simulate the behavior of a particular iteration of a loop. -/// -/// This is a cached interface. The first call may do non-trivial work to -/// compute the result, but all subsequent calls will return a fast answer -/// based on a cached result. This includes caching negative results. -Optional<SCEVCache::GEPDescriptor> -SCEVCache::getGEPDescriptor(GetElementPtrInst *GEP) { - decltype(GEPDescriptors)::iterator It; - bool Inserted; - - std::tie(It, Inserted) = GEPDescriptors.insert({GEP, {}}); - - if (!Inserted) { - if (!It->second.BaseAddr) - return None; - - return It->second; - } - - // We've inserted a new record into the cache, so compute the GEP descriptor - // if possible. - Value *V = cast<Value>(GEP); - if (!SE.isSCEVable(V->getType())) - return None; - const SCEV *S = SE.getSCEV(V); - - // FIXME: It'd be nice if the worklist and set used by the - // SCEVTraversal could be re-used between loop iterations, but the - // interface doesn't support that. There is no way to clear the visited - // sets between uses. - FindConstantPointers Visitor(&L, SE); - SCEVTraversal<FindConstantPointers> T(Visitor); - - // Try to find (BaseAddress+Step+Offset) tuple. - // If succeeded, save it to the cache - it might help in folding - // loads. - T.visitAll(S); - if (!Visitor.IndexIsConstant || !Visitor.BaseAddress) - return None; - - const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress); - if (BaseAddrSE->getType() != S->getType()) - return None; - const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffSE); - - if (!AR) - return None; - - const SCEVConstant *StepSE = - dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)); - const SCEVConstant *StartSE = dyn_cast<SCEVConstant>(AR->getStart()); - if (!StepSE || !StartSE) - return None; - - // Check and skip caching if doing so would require lots of bits to - // avoid overflow. - APInt Start = StartSE->getValue()->getValue(); - APInt Step = StepSE->getValue()->getValue(); - if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32) - return None; - - // We found a cacheable SCEV model for the GEP. - It->second.BaseAddr = Visitor.BaseAddress; - It->second.Start = Start.getLimitedValue(); - It->second.Step = Step.getLimitedValue(); - return It->second; -} - -namespace { // This class is used to get an estimate of the optimization effects that we // could get from complete loop unrolling. It comes from the fact that some // loads might be replaced with concrete constant values and that could trigger @@ -446,17 +270,31 @@ namespace { class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> { typedef InstVisitor<UnrolledInstAnalyzer, bool> Base; friend class InstVisitor<UnrolledInstAnalyzer, bool>; + struct SimplifiedAddress { + Value *Base = nullptr; + ConstantInt *Offset = nullptr; + }; public: UnrolledInstAnalyzer(unsigned Iteration, DenseMap<Value *, Constant *> &SimplifiedValues, - SCEVCache &SC) - : Iteration(Iteration), SimplifiedValues(SimplifiedValues), SC(SC) {} + const Loop *L, ScalarEvolution &SE) + : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) { + IterationNumber = SE.getConstant(APInt(64, Iteration)); + } // Allow access to the initial visit method. using Base::visit; private: + /// \brief A cache of pointer bases and constant-folded offsets corresponding + /// to GEP (or derived from GEP) instructions. + /// + /// In order to find the base pointer one needs to perform non-trivial + /// traversal of the corresponding SCEV expression, so it's good to have the + /// results saved. + DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses; + /// \brief Number of currently simulated iteration. /// /// If an expression is ConstAddress+Constant, then the Constant is @@ -464,18 +302,71 @@ private: /// SCEVGEPCache. unsigned Iteration; - // While we walk the loop instructions, we we build up and maintain a mapping - // of simplified values specific to this iteration. The idea is to propagate - // any special information we have about loads that can be replaced with - // constants after complete unrolling, and account for likely simplifications - // post-unrolling. + /// \brief SCEV expression corresponding to number of currently simulated + /// iteration. + const SCEV *IterationNumber; + + /// \brief A Value->Constant map for keeping values that we managed to + /// constant-fold on the given iteration. + /// + /// While we walk the loop instructions, we build up and maintain a mapping + /// of simplified values specific to this iteration. The idea is to propagate + /// any special information we have about loads that can be replaced with + /// constants after complete unrolling, and account for likely simplifications + /// post-unrolling. DenseMap<Value *, Constant *> &SimplifiedValues; - // We use a cache to wrap all our SCEV queries. - SCEVCache &SC; + const Loop *L; + ScalarEvolution &SE; + + /// \brief Try to simplify instruction \param I using its SCEV expression. + /// + /// The idea is that some AddRec expressions become constants, which then + /// could trigger folding of other instructions. However, that only happens + /// for expressions whose start value is also constant, which isn't always the + /// case. In another common and important case the start value is just some + /// address (i.e. SCEVUnknown) - in this case we compute the offset and save + /// it along with the base address instead. + bool simplifyInstWithSCEV(Instruction *I) { + if (!SE.isSCEVable(I->getType())) + return false; + + const SCEV *S = SE.getSCEV(I); + if (auto *SC = dyn_cast<SCEVConstant>(S)) { + SimplifiedValues[I] = SC->getValue(); + return true; + } + + auto *AR = dyn_cast<SCEVAddRecExpr>(S); + if (!AR) + return false; + + const SCEV *ValueAtIteration = AR->evaluateAtIteration(IterationNumber, SE); + // Check if the AddRec expression becomes a constant. + if (auto *SC = dyn_cast<SCEVConstant>(ValueAtIteration)) { + SimplifiedValues[I] = SC->getValue(); + return true; + } + + // Check if the offset from the base address becomes a constant. + auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(S)); + if (!Base) + return false; + auto *Offset = + dyn_cast<SCEVConstant>(SE.getMinusSCEV(ValueAtIteration, Base)); + if (!Offset) + return false; + SimplifiedAddress Address; + Address.Base = Base->getValue(); + Address.Offset = Offset->getValue(); + SimplifiedAddresses[I] = Address; + return true; + } /// Base case for the instruction visitor. - bool visitInstruction(Instruction &I) { return false; }; + bool visitInstruction(Instruction &I) { + return simplifyInstWithSCEV(&I); + } /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt. @@ -492,6 +383,7 @@ private: if (!isa<Constant>(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; + Value *SimpleV = nullptr; const DataLayout &DL = I.getModule()->getDataLayout(); if (auto FI = dyn_cast<FPMathOperator>(&I)) @@ -503,24 +395,21 @@ private: if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) SimplifiedValues[&I] = C; - return SimpleV; + if (SimpleV) + return true; + return Base::visitBinaryOperator(I); } /// Try to fold load I. bool visitLoad(LoadInst &I) { Value *AddrOp = I.getPointerOperand(); - if (!isa<Constant>(AddrOp)) - if (Constant *SimplifiedAddrOp = SimplifiedValues.lookup(AddrOp)) - AddrOp = SimplifiedAddrOp; - auto *GEP = dyn_cast<GetElementPtrInst>(AddrOp); - if (!GEP) - return false; - auto OptionalGEPDesc = SC.getGEPDescriptor(GEP); - if (!OptionalGEPDesc) + auto AddressIt = SimplifiedAddresses.find(AddrOp); + if (AddressIt == SimplifiedAddresses.end()) return false; + ConstantInt *SimplifiedAddrOp = AddressIt->second.Offset; - auto GV = dyn_cast<GlobalVariable>(OptionalGEPDesc->BaseAddr); + auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base); // We're only interested in loads that can be completely folded to a // constant. if (!GV || !GV->hasInitializer()) @@ -531,13 +420,10 @@ private: if (!CDS) return false; - // This calculation should never overflow because we bound Iteration quite - // low and both the start and step are 32-bit integers. We use signed - // integers so that UBSan will catch if a bug sneaks into the code. int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U; - int64_t Index = ((int64_t)OptionalGEPDesc->Start + - (int64_t)OptionalGEPDesc->Step * (int64_t)Iteration) / - ElemSize; + assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 && + "Unexpectedly large index value."); + int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize; if (Index >= CDS->getNumElements()) { // FIXME: For now we conservatively ignore out of bound accesses, but // we're allowed to perform the optimization in this case. @@ -556,11 +442,12 @@ private: namespace { struct EstimatedUnrollCost { - /// \brief Count the number of optimized instructions. - unsigned NumberOfOptimizedInstructions; + /// \brief The estimated cost after unrolling. + unsigned UnrolledCost; - /// \brief Count the total number of instructions. - unsigned UnrolledLoopSize; + /// \brief The estimated dynamic cost of executing the instructions in the + /// rolled form. + unsigned RolledDynamicCost; }; } @@ -593,12 +480,15 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, SmallSetVector<BasicBlock *, 16> BBWorklist; DenseMap<Value *, Constant *> SimplifiedValues; - // Use a cache to access SCEV expressions so that we don't pay the cost on - // each iteration. This cache is lazily self-populating. - SCEVCache SC(*L, SE); - - unsigned NumberOfOptimizedInstructions = 0; - unsigned UnrolledLoopSize = 0; + // The estimated cost of the unrolled form of the loop. We try to estimate + // this by simplifying as much as we can while computing the estimate. + unsigned UnrolledCost = 0; + // We also track the estimated dynamic (that is, actually executed) cost in + // the rolled form. This helps identify cases when the savings from unrolling + // aren't just exposing dead control flows, but actual reduced dynamic + // instructions due to the simplifications which we expect to occur after + // unrolling. + unsigned RolledDynamicCost = 0; // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. @@ -606,7 +496,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { SimplifiedValues.clear(); - UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SC); + UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE); BBWorklist.clear(); BBWorklist.insert(L->getHeader()); @@ -618,17 +508,20 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { - UnrolledLoopSize += TTI.getUserCost(&I); + unsigned InstCost = TTI.getUserCost(&I); // Visit the instruction to analyze its loop cost after unrolling, - // and if the visitor returns true, then we can optimize this - // instruction away. - if (Analyzer.visit(I)) - NumberOfOptimizedInstructions += TTI.getUserCost(&I); + // and if the visitor returns false, include this instruction in the + // unrolled cost. + if (!Analyzer.visit(I)) + UnrolledCost += InstCost; + + // Also track this instructions expected cost when executing the rolled + // loop form. + RolledDynamicCost += InstCost; // If unrolled body turns out to be too big, bail out. - if (UnrolledLoopSize - NumberOfOptimizedInstructions > - MaxUnrolledLoopSize) + if (UnrolledCost > MaxUnrolledLoopSize) return None; } @@ -640,10 +533,10 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, // If we found no optimization opportunities on the first iteration, we // won't find them on later ones too. - if (!NumberOfOptimizedInstructions) + if (UnrolledCost == RolledDynamicCost) return None; } - return {{NumberOfOptimizedInstructions, UnrolledLoopSize}}; + return {{UnrolledCost, RolledDynamicCost}}; } /// ApproximateLoopSize - Approximate the size of the loop. @@ -749,46 +642,56 @@ static void SetLoopAlreadyUnrolled(Loop *L) { L->setLoopID(NewLoopID); } -bool LoopUnroll::canUnrollCompletely( - Loop *L, unsigned Threshold, unsigned AbsoluteThreshold, - uint64_t UnrolledSize, unsigned NumberOfOptimizedInstructions, - unsigned PercentOfOptimizedForCompleteUnroll) { +bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold, + unsigned PercentDynamicCostSavedThreshold, + unsigned DynamicCostSavingsDiscount, + uint64_t UnrolledCost, + uint64_t RolledDynamicCost) { if (Threshold == NoThreshold) { DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n"); return true; } - if (UnrolledSize <= Threshold) { - DEBUG(dbgs() << " Can fully unroll, because unrolled size: " - << UnrolledSize << "<" << Threshold << "\n"); + if (UnrolledCost <= Threshold) { + DEBUG(dbgs() << " Can fully unroll, because unrolled cost: " + << UnrolledCost << "<" << Threshold << "\n"); return true; } - assert(UnrolledSize && "UnrolledSize can't be 0 at this point."); - unsigned PercentOfOptimizedInstructions = - (uint64_t)NumberOfOptimizedInstructions * 100ull / UnrolledSize; - - if (UnrolledSize <= AbsoluteThreshold && - PercentOfOptimizedInstructions >= PercentOfOptimizedForCompleteUnroll) { - DEBUG(dbgs() << " Can fully unroll, because unrolling will help removing " - << PercentOfOptimizedInstructions - << "% instructions (threshold: " - << PercentOfOptimizedForCompleteUnroll << "%)\n"); - DEBUG(dbgs() << " Unrolled size (" << UnrolledSize - << ") is less than the threshold (" << AbsoluteThreshold - << ").\n"); + assert(UnrolledCost && "UnrolledCost can't be 0 at this point."); + assert(RolledDynamicCost >= UnrolledCost && + "Cannot have a higher unrolled cost than a rolled cost!"); + + // Compute the percentage of the dynamic cost in the rolled form that is + // saved when unrolled. If unrolling dramatically reduces the estimated + // dynamic cost of the loop, we use a higher threshold to allow more + // unrolling. + unsigned PercentDynamicCostSaved = + (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost; + + if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold && + (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <= + (int64_t)Threshold) { + DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the " + "expected dynamic cost by " << PercentDynamicCostSaved + << "% (threshold: " << PercentDynamicCostSavedThreshold + << "%)\n" + << " and the unrolled cost (" << UnrolledCost + << ") is less than the max threshold (" + << DynamicCostSavingsDiscount << ").\n"); return true; } DEBUG(dbgs() << " Too large to fully unroll:\n"); - DEBUG(dbgs() << " Unrolled size: " << UnrolledSize << "\n"); - DEBUG(dbgs() << " Estimated number of optimized instructions: " - << NumberOfOptimizedInstructions << "\n"); - DEBUG(dbgs() << " Absolute threshold: " << AbsoluteThreshold << "\n"); - DEBUG(dbgs() << " Minimum percent of removed instructions: " - << PercentOfOptimizedForCompleteUnroll << "\n"); - DEBUG(dbgs() << " Threshold for small loops: " << Threshold << "\n"); + DEBUG(dbgs() << " Threshold: " << Threshold << "\n"); + DEBUG(dbgs() << " Max threshold: " << DynamicCostSavingsDiscount << "\n"); + DEBUG(dbgs() << " Percent cost saved threshold: " + << PercentDynamicCostSavedThreshold << "%\n"); + DEBUG(dbgs() << " Unrolled cost: " << UnrolledCost << "\n"); + DEBUG(dbgs() << " Rolled dynamic cost: " << RolledDynamicCost << "\n"); + DEBUG(dbgs() << " Percent cost saved: " << PercentDynamicCostSaved + << "\n"); return false; } @@ -899,9 +802,11 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } unsigned Threshold, PartialThreshold; - unsigned AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll; + unsigned PercentDynamicCostSavedThreshold; + unsigned DynamicCostSavingsDiscount; selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold, - AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll); + PercentDynamicCostSavedThreshold, + DynamicCostSavingsDiscount); // Given Count, TripCount and thresholds determine the type of // unrolling which is to be performed. @@ -910,20 +815,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (TripCount && Count == TripCount) { Unrolling = Partial; // If the loop is really small, we don't need to run an expensive analysis. - if (canUnrollCompletely( - L, Threshold, AbsoluteThreshold, - UnrolledSize, 0, 100)) { + if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount, + UnrolledSize, UnrolledSize)) { Unrolling = Full; } else { // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. - if (Optional<EstimatedUnrollCost> Cost = - analyzeLoopUnrollCost(L, TripCount, *SE, TTI, AbsoluteThreshold)) - if (canUnrollCompletely(L, Threshold, AbsoluteThreshold, - Cost->UnrolledLoopSize, - Cost->NumberOfOptimizedInstructions, - PercentOfOptimizedForCompleteUnroll)) { + if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost( + L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount)) + if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold, + DynamicCostSavingsDiscount, Cost->UnrolledCost, + Cost->RolledDynamicCost)) { Unrolling = Full; } } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 66d6ac6f3a09..2bdf670f67e3 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // Check that nothing touches the dest of the "copy" between // the call and the store. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - AliasAnalysis::Location StoreLoc = AA.getLocation(SI); + AliasAnalysis::Location StoreLoc = MemoryLocation::get(SI); for (BasicBlock::iterator I = --BasicBlock::iterator(SI), E = C; I != E; --I) { if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { @@ -802,9 +802,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) { // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemDepResult SourceDep = - MD->getPointerDependencyFrom(AA.getLocationForSource(MDep), - false, M, M->getParent()); + MemDepResult SourceDep = MD->getPointerDependencyFrom( + MemoryLocation::getForSource(MDep), false, M, M->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; @@ -812,7 +811,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) { // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. bool UseMemMove = false; - if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) + if (!AA.isNoAlias(MemoryLocation::getForDest(M), + MemoryLocation::getForSource(MDep))) UseMemMove = true; // If all checks passed, then we can transform M. @@ -860,9 +860,8 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy, return false; // Check that there are no other dependencies on the memset destination. - MemDepResult DstDepInfo = - MD->getPointerDependencyFrom(AliasAnalysis::getLocationForDest(MemSet), - false, MemCpy, MemCpy->getParent()); + MemDepResult DstDepInfo = MD->getPointerDependencyFrom( + MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent()); if (DstDepInfo.getInst() != MemSet) return false; @@ -998,7 +997,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { } } - AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M); + AliasAnalysis::Location SrcLoc = MemoryLocation::getForSource(M); MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, M, M->getParent()); @@ -1047,7 +1046,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { return false; // See if the pointers alias. - if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) + if (!AA.isNoAlias(MemoryLocation::getForDest(M), + MemoryLocation::getForSource(M))) return false; DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); @@ -1121,8 +1121,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. MemDepResult SourceDep = - MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep), - false, CS.getInstruction(), MDep->getParent()); + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + CS.getInstruction(), MDep->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 611a941b0b21..776dfb4d487f 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -241,7 +241,7 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) { bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, const Instruction& End, LoadInst* LI) { - AliasAnalysis::Location Loc = AA->getLocation(LI); + AliasAnalysis::Location Loc = MemoryLocation::get(LI); return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod); } @@ -266,8 +266,8 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1, LoadInst *Load1 = dyn_cast<LoadInst>(Inst); BasicBlock *BB0 = Load0->getParent(); - AliasAnalysis::Location Loc0 = AA->getLocation(Load0); - AliasAnalysis::Location Loc1 = AA->getLocation(Load1); + AliasAnalysis::Location Loc0 = MemoryLocation::get(Load0); + AliasAnalysis::Location Loc1 = MemoryLocation::get(Load1); if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) && !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) && !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) { @@ -425,8 +425,8 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, StoreInst *Store1 = cast<StoreInst>(Inst); - AliasAnalysis::Location Loc0 = AA->getLocation(Store0); - AliasAnalysis::Location Loc1 = AA->getLocation(Store1); + AliasAnalysis::Location Loc0 = MemoryLocation::get(Store0); + AliasAnalysis::Location Loc1 = MemoryLocation::get(Store1); if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))), BB1->back(), Loc1) && diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index 5b370e04088f..4cf68b00da0a 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -234,6 +234,7 @@ bool NaryReassociate::doOneIteration(Function &F) { BasicBlock *BB = Node->getBlock(); for (auto I = BB->begin(); I != BB->end(); ++I) { if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) { + const SCEV *OldSCEV = SE->getSCEV(I); if (Instruction *NewI = tryReassociate(I)) { Changed = true; SE->forgetValue(I); @@ -243,7 +244,28 @@ bool NaryReassociate::doOneIteration(Function &F) { } // Add the rewritten instruction to SeenExprs; the original instruction // is deleted. - SeenExprs[SE->getSCEV(I)].push_back(I); + const SCEV *NewSCEV = SE->getSCEV(I); + SeenExprs[NewSCEV].push_back(I); + // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) + // is equivalent to I. However, ScalarEvolution::getSCEV may + // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose + // we reassociate + // I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4 + // to + // NewI = &a[sext(i)] + sext(j). + // + // ScalarEvolution computes + // getSCEV(I) = a + 4 * sext(i + j) + // getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j) + // which are different SCEVs. + // + // To alleviate this issue of ScalarEvolution not always capturing + // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can + // map both SCEV before and after tryReassociate(I) to I. + // + // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. + if (NewSCEV != OldSCEV) + SeenExprs[OldSCEV].push_back(I); } } } @@ -295,8 +317,10 @@ static bool isGEPFoldable(GetElementPtrInst *GEP, BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field); } } + + unsigned AddrSpace = GEP->getPointerAddressSpace(); return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV, - BaseOffset, HasBaseReg, Scale); + BaseOffset, HasBaseReg, Scale, AddrSpace); } Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) { diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index 3e7deeba9f21..9ecaf102574a 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -496,7 +496,7 @@ template <typename T> static void unique_unsorted(std::vector<T> &vec) { } } -static std::string GCSafepointPollName("gc.safepoint_poll"); +static const char *const GCSafepointPollName = "gc.safepoint_poll"; static bool isGCSafepointPoll(Function &F) { return F.getName().equals(GCSafepointPollName); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index b677523d7032..6c66b58729e9 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -733,7 +733,7 @@ static bool LinearizeExprTree(BinaryOperator *I, if (Ops.empty()) { Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); assert(Identity && "Associative operation without identity!"); - Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1))); + Ops.emplace_back(Identity, APInt(Bitwidth, 1)); } return Changed; @@ -1966,38 +1966,35 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) { if (!I->hasOneUse() || I->getType()->isVectorTy()) return nullptr; - // Must be a mul, fmul, or fdiv instruction. + // Must be a fmul or fdiv instruction. unsigned Opcode = I->getOpcode(); - if (Opcode != Instruction::Mul && Opcode != Instruction::FMul && - Opcode != Instruction::FDiv) + if (Opcode != Instruction::FMul && Opcode != Instruction::FDiv) return nullptr; - // Must have at least one constant operand. - Constant *C0 = dyn_cast<Constant>(I->getOperand(0)); - Constant *C1 = dyn_cast<Constant>(I->getOperand(1)); - if (!C0 && !C1) + auto *C0 = dyn_cast<ConstantFP>(I->getOperand(0)); + auto *C1 = dyn_cast<ConstantFP>(I->getOperand(1)); + + // Both operands are constant, let it get constant folded away. + if (C0 && C1) return nullptr; - // Must be a negative ConstantInt or ConstantFP. - Constant *C = C0 ? C0 : C1; - unsigned ConstIdx = C0 ? 0 : 1; - if (auto *CI = dyn_cast<ConstantInt>(C)) { - if (!CI->isNegative() || CI->isMinValue(true)) - return nullptr; - } else if (auto *CF = dyn_cast<ConstantFP>(C)) { - if (!CF->isNegative()) - return nullptr; - } else + ConstantFP *CF = C0 ? C0 : C1; + + // Must have one constant operand. + if (!CF) + return nullptr; + + // Must be a negative ConstantFP. + if (!CF->isNegative()) return nullptr; // User must be a binary operator with one or more uses. Instruction *User = I->user_back(); - if (!isa<BinaryOperator>(User) || !User->getNumUses()) + if (!isa<BinaryOperator>(User) || !User->hasNUsesOrMore(1)) return nullptr; unsigned UserOpcode = User->getOpcode(); - if (UserOpcode != Instruction::Add && UserOpcode != Instruction::FAdd && - UserOpcode != Instruction::Sub && UserOpcode != Instruction::FSub) + if (UserOpcode != Instruction::FAdd && UserOpcode != Instruction::FSub) return nullptr; // Subtraction is not commutative. Explicitly, the following transform is @@ -2006,14 +2003,9 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) { return nullptr; // Change the sign of the constant. - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) - I->setOperand(ConstIdx, ConstantInt::get(CI->getContext(), -CI->getValue())); - else { - ConstantFP *CF = cast<ConstantFP>(C); - APFloat Val = CF->getValueAPF(); - Val.changeSign(); - I->setOperand(ConstIdx, ConstantFP::get(CF->getContext(), Val)); - } + APFloat Val = CF->getValueAPF(); + Val.changeSign(); + I->setOperand(C0 ? 0 : 1, ConstantFP::get(CF->getContext(), Val)); // Canonicalize I to RHS to simplify the next bit of logic. E.g., // ((-Const*y) + x) -> (x + (-Const*y)). @@ -2023,15 +2015,9 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) { Value *Op0 = User->getOperand(0); Value *Op1 = User->getOperand(1); BinaryOperator *NI; - switch(UserOpcode) { + switch (UserOpcode) { default: llvm_unreachable("Unexpected Opcode!"); - case Instruction::Add: - NI = BinaryOperator::CreateSub(Op0, Op1); - break; - case Instruction::Sub: - NI = BinaryOperator::CreateAdd(Op0, Op1); - break; case Instruction::FAdd: NI = BinaryOperator::CreateFSub(Op0, Op1); NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags()); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 6cf765a8438c..6f6ba72c6e6f 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" @@ -74,13 +75,27 @@ static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live", cl::Hidden); namespace { -struct RewriteStatepointsForGC : public FunctionPass { +struct RewriteStatepointsForGC : public ModulePass { static char ID; // Pass identification, replacement for typeid - RewriteStatepointsForGC() : FunctionPass(ID) { + RewriteStatepointsForGC() : ModulePass(ID) { initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry()); } - bool runOnFunction(Function &F) override; + bool runOnFunction(Function &F); + bool runOnModule(Module &M) override { + bool Changed = false; + for (Function &F : M) + Changed |= runOnFunction(F); + + if (Changed) { + // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn + // returns true for at least one function in the module. Since at least + // one function changed, we know that the precondition is satisfied. + stripDereferenceabilityInfo(M); + } + + return Changed; + } void getAnalysisUsage(AnalysisUsage &AU) const override { // We add and rewrite a bunch of instructions, but don't really do much @@ -88,12 +103,26 @@ struct RewriteStatepointsForGC : public FunctionPass { AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); } + + /// The IR fed into RewriteStatepointsForGC may have had attributes implying + /// dereferenceability that are no longer valid/correct after + /// RewriteStatepointsForGC has run. This is because semantically, after + /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire + /// heap. stripDereferenceabilityInfo (conservatively) restores correctness + /// by erasing all attributes in the module that externally imply + /// dereferenceability. + /// + void stripDereferenceabilityInfo(Module &M); + + // Helpers for stripDereferenceabilityInfo + void stripDereferenceabilityInfoFromBody(Function &F); + void stripDereferenceabilityInfoFromPrototype(Function &F); }; } // namespace char RewriteStatepointsForGC::ID = 0; -FunctionPass *llvm::createRewriteStatepointsForGCPass() { +ModulePass *llvm::createRewriteStatepointsForGCPass() { return new RewriteStatepointsForGC(); } @@ -1031,14 +1060,11 @@ static void recomputeLiveInValues( // goes through the statepoint. We might need to split an edge to make this // possible. static BasicBlock * -normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) { - DominatorTree *DT = nullptr; - if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) - DT = &DTP->getDomTree(); - +normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, + DominatorTree &DT) { BasicBlock *Ret = BB; if (!BB->getUniquePredecessor()) { - Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT); + Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, &DT); } // Now that 'ret' has unique predecessor we can safely remove all phi nodes @@ -2016,9 +2042,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, continue; InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(), - P); + DT); normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(), - P); + DT); } // A list of dummy calls added to the IR to keep various values obviously @@ -2197,6 +2223,72 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, return !records.empty(); } +// Handles both return values and arguments for Functions and CallSites. +template <typename AttrHolder> +static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, + unsigned Index) { + AttrBuilder R; + if (AH.getDereferenceableBytes(Index)) + R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable, + AH.getDereferenceableBytes(Index))); + if (AH.getDereferenceableOrNullBytes(Index)) + R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, + AH.getDereferenceableOrNullBytes(Index))); + + if (!R.empty()) + AH.setAttributes(AH.getAttributes().removeAttributes( + Ctx, Index, AttributeSet::get(Ctx, Index, R))); +} + +void +RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) { + LLVMContext &Ctx = F.getContext(); + + for (Argument &A : F.args()) + if (isa<PointerType>(A.getType())) + RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1); + + if (isa<PointerType>(F.getReturnType())) + RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex); +} + +void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) { + if (F.empty()) + return; + + LLVMContext &Ctx = F.getContext(); + MDBuilder Builder(Ctx); + + for (Instruction &I : inst_range(F)) { + if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { + assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); + bool IsImmutableTBAA = + MD->getNumOperands() == 4 && + mdconst::extract<ConstantInt>(MD->getOperand(3))->getValue() == 1; + + if (!IsImmutableTBAA) + continue; // no work to do, MD_tbaa is already marked mutable + + MDNode *Base = cast<MDNode>(MD->getOperand(0)); + MDNode *Access = cast<MDNode>(MD->getOperand(1)); + uint64_t Offset = + mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue(); + + MDNode *MutableTBAA = + Builder.createTBAAStructTagNode(Base, Access, Offset); + I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); + } + + if (CallSite CS = CallSite(&I)) { + for (int i = 0, e = CS.arg_size(); i != e; i++) + if (isa<PointerType>(CS.getArgument(i)->getType())) + RemoveDerefAttrAtIndex(Ctx, CS, i + 1); + if (isa<PointerType>(CS.getType())) + RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex); + } + } +} + /// Returns true if this function should be rewritten by this pass. The main /// point of this function is as an extension point for custom logic. static bool shouldRewriteStatepointsIn(Function &F) { @@ -2211,6 +2303,19 @@ static bool shouldRewriteStatepointsIn(Function &F) { return false; } +void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) { +#ifndef NDEBUG + assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) && + "precondition!"); +#endif + + for (Function &F : M) + stripDereferenceabilityInfoFromPrototype(F); + + for (Function &F : M) + stripDereferenceabilityInfoFromBody(F); +} + bool RewriteStatepointsForGC::runOnFunction(Function &F) { // Nothing to do for declarations. if (F.isDeclaration() || F.empty()) @@ -2221,7 +2326,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) { if (!shouldRewriteStatepointsIn(F)) return false; - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); // Gather all the statepoints which need rewritten. Be careful to only // consider those in reachable code since we need to ask dominance queries diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 3a782d159dab..4a875311881a 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -852,9 +852,11 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { TargetTransformInfo &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI( *GEP->getParent()->getParent()); + unsigned AddrSpace = GEP->getPointerAddressSpace(); if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(), /*BaseGV=*/nullptr, AccumulativeByteOffset, - /*HasBaseReg=*/true, /*Scale=*/0)) { + /*HasBaseReg=*/true, /*Scale=*/0, + AddrSpace)) { return Changed; } } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 8566cd9736d3..f0e3ffdb95ac 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -193,11 +193,18 @@ namespace { struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid unsigned BonusInstThreshold; - CFGSimplifyPass(int T = -1) : FunctionPass(ID) { + std::function<bool(const Function &)> PredicateFtor; + + CFGSimplifyPass(int T = -1, + std::function<bool(const Function &)> Ftor = nullptr) + : FunctionPass(ID), PredicateFtor(Ftor) { BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T); initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { + if (PredicateFtor && !PredicateFtor(F)) + return false; + if (skipOptnoneFunction(F)) return false; @@ -224,7 +231,9 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) // Public interface to the CFGSimplification pass -FunctionPass *llvm::createCFGSimplificationPass(int Threshold) { - return new CFGSimplifyPass(Threshold); +FunctionPass * +llvm::createCFGSimplificationPass(int Threshold, + std::function<bool(const Function &)> Ftor) { + return new CFGSimplifyPass(Threshold, Ftor); } diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index b169d5612f00..078c6a921a08 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -163,7 +163,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, } if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { - AliasAnalysis::Location Loc = AA->getLocation(L); + AliasAnalysis::Location Loc = MemoryLocation::get(L); for (Instruction *S : Stores) if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod) return false; @@ -172,6 +172,12 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst)) return false; + // Convergent operations can only be moved to control equivalent blocks. + if (auto CS = CallSite(Inst)) { + if (CS.hasFnAttr(Attribute::Convergent)) + return false; + } + return true; } diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index a5890c0adb06..5f25e6b2cb6f 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -491,6 +491,9 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, const DataLayout &DL = Phi->getModule()->getDataLayout(); int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); + if (!Size) + return false; + int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return false; diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 623dbc9f42ce..a87f8504bfb5 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -636,7 +636,7 @@ void PromoteMem2Reg::run() { // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; - RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values)); + RenamePassWorkList.emplace_back(F.begin(), nullptr, std::move(Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); @@ -973,7 +973,7 @@ NextIteration: for (; I != E; ++I) if (VisitedSuccs.insert(*I).second) - Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); + Worklist.emplace_back(*I, Pred, IncomingVals); goto NextIteration; } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 3757a801d645..ab30aa17c76b 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -141,7 +141,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) ++NumElimOperand; Changed = true; if (IVOperand->use_empty()) - DeadInsts.push_back(IVOperand); + DeadInsts.emplace_back(IVOperand); return IVSrc; } @@ -178,7 +178,7 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); ++NumElimCmp; Changed = true; - DeadInsts.push_back(ICmp); + DeadInsts.emplace_back(ICmp); } /// SimplifyIVUsers helper for eliminating useless @@ -229,7 +229,7 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; - DeadInsts.push_back(Rem); + DeadInsts.emplace_back(Rem); } /// Eliminate an operation that consumes a simple IV and has @@ -260,7 +260,7 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, UseInst->replaceAllUsesWith(IVOperand); ++NumElimIdentity; Changed = true; - DeadInsts.push_back(UseInst); + DeadInsts.emplace_back(UseInst); return true; } @@ -386,7 +386,7 @@ Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, "Bad add instruction created from overflow intrinsic."); AddVal->replaceAllUsesWith(AddInst); - DeadInsts.push_back(AddVal); + DeadInsts.emplace_back(AddVal); return AddInst; } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index cac80accc32f..8c72641da9e7 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -400,8 +400,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, } if (auto *AI = dyn_cast<AllocaInst>(I)) AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType())); - if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) + if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { GEP->setSourceElementType( TypeMapper->remapType(GEP->getSourceElementType())); + GEP->setResultElementType( + TypeMapper->remapType(GEP->getResultElementType())); + } I->mutateType(TypeMapper->remapType(I->getType())); } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 011fd0f6fa88..95c9381985ab 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -34,6 +34,10 @@ // Variable uniformity checks are inspired by: // Karrenberg, R. and Hack, S. Whole Function Vectorization. // +// The interleaved access vectorization is based on the paper: +// Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved +// Data for SIMD +// // Other ideas/concepts are from: // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. // @@ -134,6 +138,16 @@ static cl::opt<bool> EnableMemAccessVersioning( "enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symblic stride memory access versioning")); +static cl::opt<bool> EnableInterleavedMemAccesses( + "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, + cl::desc("Enable vectorization on interleaved memory accesses in a loop")); + +/// Maximum factor for an interleaved memory access. +static cl::opt<unsigned> MaxInterleaveGroupFactor( + "max-interleave-group-factor", cl::Hidden, + cl::desc("Maximum factor for an interleaved access group (default = 8)"), + cl::init(8)); + /// We don't unroll loops with a known constant trip count below this number. static const unsigned TinyTripCountUnrollThreshold = 128; @@ -351,6 +365,9 @@ protected: /// broadcast them into a vector. VectorParts &getVectorValue(Value *V); + /// Try to vectorize the interleaved access group that \p Instr belongs to. + void vectorizeInterleaveGroup(Instruction *Instr); + /// Generate a shuffle sequence that will reverse the vector Vec. virtual Value *reverseVector(Value *Vec); @@ -545,6 +562,219 @@ static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *F propagateMetadata(I, From); } +/// \brief The group of interleaved loads/stores sharing the same stride and +/// close to each other. +/// +/// Each member in this group has an index starting from 0, and the largest +/// index should be less than interleaved factor, which is equal to the absolute +/// value of the access's stride. +/// +/// E.g. An interleaved load group of factor 4: +/// for (unsigned i = 0; i < 1024; i+=4) { +/// a = A[i]; // Member of index 0 +/// b = A[i+1]; // Member of index 1 +/// d = A[i+3]; // Member of index 3 +/// ... +/// } +/// +/// An interleaved store group of factor 4: +/// for (unsigned i = 0; i < 1024; i+=4) { +/// ... +/// A[i] = a; // Member of index 0 +/// A[i+1] = b; // Member of index 1 +/// A[i+2] = c; // Member of index 2 +/// A[i+3] = d; // Member of index 3 +/// } +/// +/// Note: the interleaved load group could have gaps (missing members), but +/// the interleaved store group doesn't allow gaps. +class InterleaveGroup { +public: + InterleaveGroup(Instruction *Instr, int Stride, unsigned Align) + : Align(Align), SmallestKey(0), LargestKey(0), InsertPos(Instr) { + assert(Align && "The alignment should be non-zero"); + + Factor = std::abs(Stride); + assert(Factor > 1 && "Invalid interleave factor"); + + Reverse = Stride < 0; + Members[0] = Instr; + } + + bool isReverse() const { return Reverse; } + unsigned getFactor() const { return Factor; } + unsigned getAlignment() const { return Align; } + unsigned getNumMembers() const { return Members.size(); } + + /// \brief Try to insert a new member \p Instr with index \p Index and + /// alignment \p NewAlign. The index is related to the leader and it could be + /// negative if it is the new leader. + /// + /// \returns false if the instruction doesn't belong to the group. + bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) { + assert(NewAlign && "The new member's alignment should be non-zero"); + + int Key = Index + SmallestKey; + + // Skip if there is already a member with the same index. + if (Members.count(Key)) + return false; + + if (Key > LargestKey) { + // The largest index is always less than the interleave factor. + if (Index >= static_cast<int>(Factor)) + return false; + + LargestKey = Key; + } else if (Key < SmallestKey) { + // The largest index is always less than the interleave factor. + if (LargestKey - Key >= static_cast<int>(Factor)) + return false; + + SmallestKey = Key; + } + + // It's always safe to select the minimum alignment. + Align = std::min(Align, NewAlign); + Members[Key] = Instr; + return true; + } + + /// \brief Get the member with the given index \p Index + /// + /// \returns nullptr if contains no such member. + Instruction *getMember(unsigned Index) const { + int Key = SmallestKey + Index; + if (!Members.count(Key)) + return nullptr; + + return Members.find(Key)->second; + } + + /// \brief Get the index for the given member. Unlike the key in the member + /// map, the index starts from 0. + unsigned getIndex(Instruction *Instr) const { + for (auto I : Members) + if (I.second == Instr) + return I.first - SmallestKey; + + llvm_unreachable("InterleaveGroup contains no such member"); + } + + Instruction *getInsertPos() const { return InsertPos; } + void setInsertPos(Instruction *Inst) { InsertPos = Inst; } + +private: + unsigned Factor; // Interleave Factor. + bool Reverse; + unsigned Align; + DenseMap<int, Instruction *> Members; + int SmallestKey; + int LargestKey; + + // To avoid breaking dependences, vectorized instructions of an interleave + // group should be inserted at either the first load or the last store in + // program order. + // + // E.g. %even = load i32 // Insert Position + // %add = add i32 %even // Use of %even + // %odd = load i32 + // + // store i32 %even + // %odd = add i32 // Def of %odd + // store i32 %odd // Insert Position + Instruction *InsertPos; +}; + +/// \brief Drive the analysis of interleaved memory accesses in the loop. +/// +/// Use this class to analyze interleaved accesses only when we can vectorize +/// a loop. Otherwise it's meaningless to do analysis as the vectorization +/// on interleaved accesses is unsafe. +/// +/// The analysis collects interleave groups and records the relationships +/// between the member and the group in a map. +class InterleavedAccessInfo { +public: + InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT) + : SE(SE), TheLoop(L), DT(DT) {} + + ~InterleavedAccessInfo() { + SmallSet<InterleaveGroup *, 4> DelSet; + // Avoid releasing a pointer twice. + for (auto &I : InterleaveGroupMap) + DelSet.insert(I.second); + for (auto *Ptr : DelSet) + delete Ptr; + } + + /// \brief Analyze the interleaved accesses and collect them in interleave + /// groups. Substitute symbolic strides using \p Strides. + void analyzeInterleaving(const ValueToValueMap &Strides); + + /// \brief Check if \p Instr belongs to any interleave group. + bool isInterleaved(Instruction *Instr) const { + return InterleaveGroupMap.count(Instr); + } + + /// \brief Get the interleave group that \p Instr belongs to. + /// + /// \returns nullptr if doesn't have such group. + InterleaveGroup *getInterleaveGroup(Instruction *Instr) const { + if (InterleaveGroupMap.count(Instr)) + return InterleaveGroupMap.find(Instr)->second; + return nullptr; + } + +private: + ScalarEvolution *SE; + Loop *TheLoop; + DominatorTree *DT; + + /// Holds the relationships between the members and the interleave group. + DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap; + + /// \brief The descriptor for a strided memory access. + struct StrideDescriptor { + StrideDescriptor(int Stride, const SCEV *Scev, unsigned Size, + unsigned Align) + : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} + + StrideDescriptor() : Stride(0), Scev(nullptr), Size(0), Align(0) {} + + int Stride; // The access's stride. It is negative for a reverse access. + const SCEV *Scev; // The scalar expression of this access + unsigned Size; // The size of the memory object. + unsigned Align; // The alignment of this access. + }; + + /// \brief Create a new interleave group with the given instruction \p Instr, + /// stride \p Stride and alignment \p Align. + /// + /// \returns the newly created interleave group. + InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride, + unsigned Align) { + assert(!InterleaveGroupMap.count(Instr) && + "Already in an interleaved access group"); + InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align); + return InterleaveGroupMap[Instr]; + } + + /// \brief Release the group and remove all the relationships. + void releaseGroup(InterleaveGroup *Group) { + for (unsigned i = 0; i < Group->getFactor(); i++) + if (Instruction *Member = Group->getMember(i)) + InterleaveGroupMap.erase(Member); + + delete Group; + } + + /// \brief Collect all the accesses with a constant stride in program order. + void collectConstStridedAccesses( + MapVector<Instruction *, StrideDescriptor> &StrideAccesses, + const ValueToValueMap &Strides); +}; + /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the @@ -565,8 +795,8 @@ public: Function *F, const TargetTransformInfo *TTI, LoopAccessAnalysis *LAA) : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F), - TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), Induction(nullptr), - WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} + TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT), + Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of inductions that we support. enum InductionKind { @@ -697,6 +927,16 @@ public: return LAI; } + /// \brief Check if \p Instr belongs to any interleaved access group. + bool isAccessInterleaved(Instruction *Instr) { + return InterleaveInfo.isInterleaved(Instr); + } + + /// \brief Get the interleaved access group that \p Instr belongs to. + const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { + return InterleaveInfo.getInterleaveGroup(Instr); + } + unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } bool hasStride(Value *V) { return StrideSet.count(V); } @@ -792,6 +1032,10 @@ private: // null until canVectorizeMemory sets it up. const LoopAccessInfo *LAI; + /// The interleave access information contains groups of interleaved accesses + /// with the same stride and close to each other. + InterleavedAccessInfo InterleaveInfo; + // --- vectorization state --- // /// Holds the integer induction variable. This is the counter of the @@ -1657,6 +1901,251 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) { "reverse"); } +// Get a mask to interleave \p NumVec vectors into a wide vector. +// I.e. <0, VF, VF*2, ..., VF*(NumVec-1), 1, VF+1, VF*2+1, ...> +// E.g. For 2 interleaved vectors, if VF is 4, the mask is: +// <0, 4, 1, 5, 2, 6, 3, 7> +static Constant *getInterleavedMask(IRBuilder<> &Builder, unsigned VF, + unsigned NumVec) { + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < VF; i++) + for (unsigned j = 0; j < NumVec; j++) + Mask.push_back(Builder.getInt32(j * VF + i)); + + return ConstantVector::get(Mask); +} + +// Get the strided mask starting from index \p Start. +// I.e. <Start, Start + Stride, ..., Start + Stride*(VF-1)> +static Constant *getStridedMask(IRBuilder<> &Builder, unsigned Start, + unsigned Stride, unsigned VF) { + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < VF; i++) + Mask.push_back(Builder.getInt32(Start + i * Stride)); + + return ConstantVector::get(Mask); +} + +// Get a mask of two parts: The first part consists of sequential integers +// starting from 0, The second part consists of UNDEFs. +// I.e. <0, 1, 2, ..., NumInt - 1, undef, ..., undef> +static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned NumInt, + unsigned NumUndef) { + SmallVector<Constant *, 16> Mask; + for (unsigned i = 0; i < NumInt; i++) + Mask.push_back(Builder.getInt32(i)); + + Constant *Undef = UndefValue::get(Builder.getInt32Ty()); + for (unsigned i = 0; i < NumUndef; i++) + Mask.push_back(Undef); + + return ConstantVector::get(Mask); +} + +// Concatenate two vectors with the same element type. The 2nd vector should +// not have more elements than the 1st vector. If the 2nd vector has less +// elements, extend it with UNDEFs. +static Value *ConcatenateTwoVectors(IRBuilder<> &Builder, Value *V1, + Value *V2) { + VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType()); + VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType()); + assert(VecTy1 && VecTy2 && + VecTy1->getScalarType() == VecTy2->getScalarType() && + "Expect two vectors with the same element type"); + + unsigned NumElts1 = VecTy1->getNumElements(); + unsigned NumElts2 = VecTy2->getNumElements(); + assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements"); + + if (NumElts1 > NumElts2) { + // Extend with UNDEFs. + Constant *ExtMask = + getSequentialMask(Builder, NumElts2, NumElts1 - NumElts2); + V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask); + } + + Constant *Mask = getSequentialMask(Builder, NumElts1 + NumElts2, 0); + return Builder.CreateShuffleVector(V1, V2, Mask); +} + +// Concatenate vectors in the given list. All vectors have the same type. +static Value *ConcatenateVectors(IRBuilder<> &Builder, + ArrayRef<Value *> InputList) { + unsigned NumVec = InputList.size(); + assert(NumVec > 1 && "Should be at least two vectors"); + + SmallVector<Value *, 8> ResList; + ResList.append(InputList.begin(), InputList.end()); + do { + SmallVector<Value *, 8> TmpList; + for (unsigned i = 0; i < NumVec - 1; i += 2) { + Value *V0 = ResList[i], *V1 = ResList[i + 1]; + assert((V0->getType() == V1->getType() || i == NumVec - 2) && + "Only the last vector may have a different type"); + + TmpList.push_back(ConcatenateTwoVectors(Builder, V0, V1)); + } + + // Push the last vector if the total number of vectors is odd. + if (NumVec % 2 != 0) + TmpList.push_back(ResList[NumVec - 1]); + + ResList = TmpList; + NumVec = ResList.size(); + } while (NumVec > 1); + + return ResList[0]; +} + +// Try to vectorize the interleave group that \p Instr belongs to. +// +// E.g. Translate following interleaved load group (factor = 3): +// for (i = 0; i < N; i+=3) { +// R = Pic[i]; // Member of index 0 +// G = Pic[i+1]; // Member of index 1 +// B = Pic[i+2]; // Member of index 2 +// ... // do something to R, G, B +// } +// To: +// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B +// %R.vec = shuffle %wide.vec, undef, <0, 3, 6, 9> ; R elements +// %G.vec = shuffle %wide.vec, undef, <1, 4, 7, 10> ; G elements +// %B.vec = shuffle %wide.vec, undef, <2, 5, 8, 11> ; B elements +// +// Or translate following interleaved store group (factor = 3): +// for (i = 0; i < N; i+=3) { +// ... do something to R, G, B +// Pic[i] = R; // Member of index 0 +// Pic[i+1] = G; // Member of index 1 +// Pic[i+2] = B; // Member of index 2 +// } +// To: +// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7> +// %B_U.vec = shuffle %B.vec, undef, <0, 1, 2, 3, u, u, u, u> +// %interleaved.vec = shuffle %R_G.vec, %B_U.vec, +// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements +// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B +void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { + const InterleaveGroup *Group = Legal->getInterleavedAccessGroup(Instr); + assert(Group && "Fail to get an interleaved access group."); + + // Skip if current instruction is not the insert position. + if (Instr != Group->getInsertPos()) + return; + + LoadInst *LI = dyn_cast<LoadInst>(Instr); + StoreInst *SI = dyn_cast<StoreInst>(Instr); + Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); + + // Prepare for the vector type of the interleaved load/store. + Type *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType(); + unsigned InterleaveFactor = Group->getFactor(); + Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF); + Type *PtrTy = VecTy->getPointerTo(Ptr->getType()->getPointerAddressSpace()); + + // Prepare for the new pointers. + setDebugLocFromInst(Builder, Ptr); + VectorParts &PtrParts = getVectorValue(Ptr); + SmallVector<Value *, 2> NewPtrs; + unsigned Index = Group->getIndex(Instr); + for (unsigned Part = 0; Part < UF; Part++) { + // Extract the pointer for current instruction from the pointer vector. A + // reverse access uses the pointer in the last lane. + Value *NewPtr = Builder.CreateExtractElement( + PtrParts[Part], + Group->isReverse() ? Builder.getInt32(VF - 1) : Builder.getInt32(0)); + + // Notice current instruction could be any index. Need to adjust the address + // to the member of index 0. + // + // E.g. a = A[i+1]; // Member of index 1 (Current instruction) + // b = A[i]; // Member of index 0 + // Current pointer is pointed to A[i+1], adjust it to A[i]. + // + // E.g. A[i+1] = a; // Member of index 1 + // A[i] = b; // Member of index 0 + // A[i+2] = c; // Member of index 2 (Current instruction) + // Current pointer is pointed to A[i+2], adjust it to A[i]. + NewPtr = Builder.CreateGEP(NewPtr, Builder.getInt32(-Index)); + + // Cast to the vector pointer type. + NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy)); + } + + setDebugLocFromInst(Builder, Instr); + Value *UndefVec = UndefValue::get(VecTy); + + // Vectorize the interleaved load group. + if (LI) { + for (unsigned Part = 0; Part < UF; Part++) { + Instruction *NewLoadInstr = Builder.CreateAlignedLoad( + NewPtrs[Part], Group->getAlignment(), "wide.vec"); + + for (unsigned i = 0; i < InterleaveFactor; i++) { + Instruction *Member = Group->getMember(i); + + // Skip the gaps in the group. + if (!Member) + continue; + + Constant *StrideMask = getStridedMask(Builder, i, InterleaveFactor, VF); + Value *StridedVec = Builder.CreateShuffleVector( + NewLoadInstr, UndefVec, StrideMask, "strided.vec"); + + // If this member has different type, cast the result type. + if (Member->getType() != ScalarTy) { + VectorType *OtherVTy = VectorType::get(Member->getType(), VF); + StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy); + } + + VectorParts &Entry = WidenMap.get(Member); + Entry[Part] = + Group->isReverse() ? reverseVector(StridedVec) : StridedVec; + } + + propagateMetadata(NewLoadInstr, Instr); + } + return; + } + + // The sub vector type for current instruction. + VectorType *SubVT = VectorType::get(ScalarTy, VF); + + // Vectorize the interleaved store group. + for (unsigned Part = 0; Part < UF; Part++) { + // Collect the stored vector from each member. + SmallVector<Value *, 4> StoredVecs; + for (unsigned i = 0; i < InterleaveFactor; i++) { + // Interleaved store group doesn't allow a gap, so each index has a member + Instruction *Member = Group->getMember(i); + assert(Member && "Fail to get a member from an interleaved store group"); + + Value *StoredVec = + getVectorValue(dyn_cast<StoreInst>(Member)->getValueOperand())[Part]; + if (Group->isReverse()) + StoredVec = reverseVector(StoredVec); + + // If this member has different type, cast it to an unified type. + if (StoredVec->getType() != SubVT) + StoredVec = Builder.CreateBitOrPointerCast(StoredVec, SubVT); + + StoredVecs.push_back(StoredVec); + } + + // Concatenate all vectors into a wide vector. + Value *WideVec = ConcatenateVectors(Builder, StoredVecs); + + // Interleave the elements in the wide vector. + Constant *IMask = getInterleavedMask(Builder, VF, InterleaveFactor); + Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask, + "interleaved.vec"); + + Instruction *NewStoreInstr = + Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment()); + propagateMetadata(NewStoreInstr, Instr); + } +} + void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // Attempt to issue a wide load. LoadInst *LI = dyn_cast<LoadInst>(Instr); @@ -1664,6 +2153,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { assert((LI || SI) && "Invalid Load/Store instruction"); + // Try to vectorize the interleave group if this access is interleaved. + if (Legal->isAccessInterleaved(Instr)) + return vectorizeInterleaveGroup(Instr); + Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType(); Type *DataTy = VectorType::get(ScalarDataTy, VF); Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); @@ -3408,6 +3901,10 @@ bool LoopVectorizationLegality::canVectorize() { "") <<"!\n"); + // Analyze interleaved memory accesses. + if (EnableInterleavedMemAccesses) + InterleaveInfo.analyzeInterleaving(Strides); + // Okay! We can vectorize. At this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. @@ -3923,6 +4420,166 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, return true; } +void InterleavedAccessInfo::collectConstStridedAccesses( + MapVector<Instruction *, StrideDescriptor> &StrideAccesses, + const ValueToValueMap &Strides) { + // Holds load/store instructions in program order. + SmallVector<Instruction *, 16> AccessList; + + for (auto *BB : TheLoop->getBlocks()) { + bool IsPred = LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); + + for (auto &I : *BB) { + if (!isa<LoadInst>(&I) && !isa<StoreInst>(&I)) + continue; + // FIXME: Currently we can't handle mixed accesses and predicated accesses + if (IsPred) + return; + + AccessList.push_back(&I); + } + } + + if (AccessList.empty()) + return; + + auto &DL = TheLoop->getHeader()->getModule()->getDataLayout(); + for (auto I : AccessList) { + LoadInst *LI = dyn_cast<LoadInst>(I); + StoreInst *SI = dyn_cast<StoreInst>(I); + + Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); + int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides); + + // The factor of the corresponding interleave group. + unsigned Factor = std::abs(Stride); + + // Ignore the access if the factor is too small or too large. + if (Factor < 2 || Factor > MaxInterleaveGroupFactor) + continue; + + const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); + unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType()); + + // An alignment of 0 means target ABI alignment. + unsigned Align = LI ? LI->getAlignment() : SI->getAlignment(); + if (!Align) + Align = DL.getABITypeAlignment(PtrTy->getElementType()); + + StrideAccesses[I] = StrideDescriptor(Stride, Scev, Size, Align); + } +} + +// Analyze interleaved accesses and collect them into interleave groups. +// +// Notice that the vectorization on interleaved groups will change instruction +// orders and may break dependences. But the memory dependence check guarantees +// that there is no overlap between two pointers of different strides, element +// sizes or underlying bases. +// +// For pointers sharing the same stride, element size and underlying base, no +// need to worry about Read-After-Write dependences and Write-After-Read +// dependences. +// +// E.g. The RAW dependence: A[i] = a; +// b = A[i]; +// This won't exist as it is a store-load forwarding conflict, which has +// already been checked and forbidden in the dependence check. +// +// E.g. The WAR dependence: a = A[i]; // (1) +// A[i] = b; // (2) +// The store group of (2) is always inserted at or below (2), and the load group +// of (1) is always inserted at or above (1). The dependence is safe. +void InterleavedAccessInfo::analyzeInterleaving( + const ValueToValueMap &Strides) { + DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); + + // Holds all the stride accesses. + MapVector<Instruction *, StrideDescriptor> StrideAccesses; + collectConstStridedAccesses(StrideAccesses, Strides); + + if (StrideAccesses.empty()) + return; + + // Holds all interleaved store groups temporarily. + SmallSetVector<InterleaveGroup *, 4> StoreGroups; + + // Search the load-load/write-write pair B-A in bottom-up order and try to + // insert B into the interleave group of A according to 3 rules: + // 1. A and B have the same stride. + // 2. A and B have the same memory object size. + // 3. B belongs to the group according to the distance. + // + // The bottom-up order can avoid breaking the Write-After-Write dependences + // between two pointers of the same base. + // E.g. A[i] = a; (1) + // A[i] = b; (2) + // A[i+1] = c (3) + // We form the group (2)+(3) in front, so (1) has to form groups with accesses + // above (1), which guarantees that (1) is always above (2). + for (auto I = StrideAccesses.rbegin(), E = StrideAccesses.rend(); I != E; + ++I) { + Instruction *A = I->first; + StrideDescriptor DesA = I->second; + + InterleaveGroup *Group = getInterleaveGroup(A); + if (!Group) { + DEBUG(dbgs() << "LV: Creating an interleave group with:" << *A << '\n'); + Group = createInterleaveGroup(A, DesA.Stride, DesA.Align); + } + + if (A->mayWriteToMemory()) + StoreGroups.insert(Group); + + for (auto II = std::next(I); II != E; ++II) { + Instruction *B = II->first; + StrideDescriptor DesB = II->second; + + // Ignore if B is already in a group or B is a different memory operation. + if (isInterleaved(B) || A->mayReadFromMemory() != B->mayReadFromMemory()) + continue; + + // Check the rule 1 and 2. + if (DesB.Stride != DesA.Stride || DesB.Size != DesA.Size) + continue; + + // Calculate the distance and prepare for the rule 3. + const SCEVConstant *DistToA = + dyn_cast<SCEVConstant>(SE->getMinusSCEV(DesB.Scev, DesA.Scev)); + if (!DistToA) + continue; + + int DistanceToA = DistToA->getValue()->getValue().getSExtValue(); + + // Skip if the distance is not multiple of size as they are not in the + // same group. + if (DistanceToA % static_cast<int>(DesA.Size)) + continue; + + // The index of B is the index of A plus the related index to A. + int IndexB = + Group->getIndex(A) + DistanceToA / static_cast<int>(DesA.Size); + + // Try to insert B into the group. + if (Group->insertMember(B, IndexB, DesB.Align)) { + DEBUG(dbgs() << "LV: Inserted:" << *B << '\n' + << " into the interleave group with" << *A << '\n'); + InterleaveGroupMap[B] = Group; + + // Set the first load in program order as the insert position. + if (B->mayReadFromMemory()) + Group->setInsertPos(B); + } + } // Iteration on instruction B + } // Iteration on instruction A + + // Remove interleaved store groups with gaps. + for (InterleaveGroup *Group : StoreGroups) + if (Group->getNumMembers() != Group->getFactor()) + releaseGroup(Group); +} + LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { // Width 1 means no vectorize @@ -4575,6 +5232,46 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { return TTI.getAddressComputationCost(VectorTy) + TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + // For an interleaved access, calculate the total cost of the whole + // interleave group. + if (Legal->isAccessInterleaved(I)) { + auto Group = Legal->getInterleavedAccessGroup(I); + assert(Group && "Fail to get an interleaved access group."); + + // Only calculate the cost once at the insert position. + if (Group->getInsertPos() != I) + return 0; + + unsigned InterleaveFactor = Group->getFactor(); + Type *WideVecTy = + VectorType::get(VectorTy->getVectorElementType(), + VectorTy->getVectorNumElements() * InterleaveFactor); + + // Holds the indices of existing members in an interleaved load group. + // An interleaved store group doesn't need this as it dones't allow gaps. + SmallVector<unsigned, 4> Indices; + if (LI) { + for (unsigned i = 0; i < InterleaveFactor; i++) + if (Group->getMember(i)) + Indices.push_back(i); + } + + // Calculate the cost of the whole interleaved group. + unsigned Cost = TTI.getInterleavedMemoryOpCost( + I->getOpcode(), WideVecTy, Group->getFactor(), Indices, + Group->getAlignment(), AS); + + if (Group->isReverse()) + Cost += + Group->getNumMembers() * + TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); + + // FIXME: The interleaved load group with a huge gap could be even more + // expensive than scalar operations. Then we could ignore such group and + // use scalar operations instead. + return Cost; + } + // Scalarized loads/stores. int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); bool Reverse = ConsecutiveStride < 0; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 504425eae406..a3a45c80d850 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -317,9 +317,9 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, /// \returns the AA location that is being access by the instruction. static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return AA->getLocation(SI); + return MemoryLocation::get(SI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) - return AA->getLocation(LI); + return MemoryLocation::get(LI); return AliasAnalysis::Location(); } @@ -472,7 +472,7 @@ private: /// Create a new VectorizableTree entry. TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) { - VectorizableTree.push_back(TreeEntry()); + VectorizableTree.emplace_back(); int idx = VectorizableTree.size() - 1; TreeEntry *Last = &VectorizableTree[idx]; Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); |