From 01095a5d43bbfde13731688ddcf6048ebb8b7721 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 23 Jul 2016 20:41:05 +0000 Subject: Vendor import of llvm release_39 branch r276489: https://llvm.org/svn/llvm-project/llvm/branches/release_39@276489 --- lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 408 ++++--- lib/Target/Hexagon/AsmParser/Makefile | 15 - lib/Target/Hexagon/BitTracker.cpp | 220 ++-- lib/Target/Hexagon/BitTracker.h | 10 +- lib/Target/Hexagon/CMakeLists.txt | 6 +- .../Hexagon/Disassembler/HexagonDisassembler.cpp | 7 +- lib/Target/Hexagon/Disassembler/Makefile | 16 - lib/Target/Hexagon/Hexagon.td | 27 +- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 51 +- lib/Target/Hexagon/HexagonBitSimplify.cpp | 50 +- lib/Target/Hexagon/HexagonBitTracker.cpp | 75 +- lib/Target/Hexagon/HexagonBitTracker.h | 8 +- lib/Target/Hexagon/HexagonBlockRanges.cpp | 483 ++++++++ lib/Target/Hexagon/HexagonBlockRanges.h | 239 ++++ lib/Target/Hexagon/HexagonBranchRelaxation.cpp | 211 ++++ lib/Target/Hexagon/HexagonCFGOptimizer.cpp | 48 +- lib/Target/Hexagon/HexagonCommonGEP.cpp | 14 +- lib/Target/Hexagon/HexagonCopyToCombine.cpp | 278 +++-- lib/Target/Hexagon/HexagonEarlyIfConv.cpp | 19 +- lib/Target/Hexagon/HexagonExpandCondsets.cpp | 1062 ++++++++-------- lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp | 357 ------ lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 20 +- lib/Target/Hexagon/HexagonFrameLowering.cpp | 1269 ++++++++++++++++---- lib/Target/Hexagon/HexagonFrameLowering.h | 61 +- lib/Target/Hexagon/HexagonGenExtract.cpp | 3 + lib/Target/Hexagon/HexagonGenInsert.cpp | 13 +- lib/Target/Hexagon/HexagonGenMux.cpp | 21 +- lib/Target/Hexagon/HexagonGenPredicate.cpp | 28 +- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 34 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 1237 ++++++++++--------- lib/Target/Hexagon/HexagonISelLowering.cpp | 458 +++++-- lib/Target/Hexagon/HexagonISelLowering.h | 54 +- lib/Target/Hexagon/HexagonInstrAlias.td | 192 +++ lib/Target/Hexagon/HexagonInstrFormats.td | 14 +- lib/Target/Hexagon/HexagonInstrFormatsV4.td | 7 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 1067 ++++++++++------ lib/Target/Hexagon/HexagonInstrInfo.h | 62 +- lib/Target/Hexagon/HexagonInstrInfo.td | 249 ++-- lib/Target/Hexagon/HexagonInstrInfoV3.td | 25 +- lib/Target/Hexagon/HexagonInstrInfoV4.td | 707 ++++++----- lib/Target/Hexagon/HexagonInstrInfoV60.td | 116 +- lib/Target/Hexagon/HexagonInstrInfoVector.td | 55 +- lib/Target/Hexagon/HexagonIntrinsics.td | 511 ++++---- lib/Target/Hexagon/HexagonIntrinsicsV4.td | 193 ++- lib/Target/Hexagon/HexagonIntrinsicsV5.td | 24 +- lib/Target/Hexagon/HexagonIntrinsicsV60.td | 4 +- lib/Target/Hexagon/HexagonMCInstLower.cpp | 66 +- lib/Target/Hexagon/HexagonMachineFunctionInfo.h | 19 +- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 406 ++++++- lib/Target/Hexagon/HexagonMachineScheduler.h | 20 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 150 +-- lib/Target/Hexagon/HexagonOperands.td | 72 +- lib/Target/Hexagon/HexagonOptAddrMode.cpp | 663 ++++++++++ lib/Target/Hexagon/HexagonOptimizeSZextends.cpp | 3 + lib/Target/Hexagon/HexagonPeephole.cpp | 101 +- lib/Target/Hexagon/HexagonRDF.h | 4 +- lib/Target/Hexagon/HexagonRDFOpt.cpp | 88 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 108 +- lib/Target/Hexagon/HexagonRegisterInfo.h | 18 +- lib/Target/Hexagon/HexagonRegisterInfo.td | 51 +- lib/Target/Hexagon/HexagonScheduleV4.td | 13 +- lib/Target/Hexagon/HexagonScheduleV55.td | 186 +-- lib/Target/Hexagon/HexagonScheduleV60.td | 11 +- lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp | 13 +- lib/Target/Hexagon/HexagonSelectionDAGInfo.h | 16 +- .../Hexagon/HexagonSplitConst32AndConst64.cpp | 63 +- lib/Target/Hexagon/HexagonSplitDouble.cpp | 24 +- lib/Target/Hexagon/HexagonStoreWidening.cpp | 3 + lib/Target/Hexagon/HexagonSubtarget.cpp | 244 ++++ lib/Target/Hexagon/HexagonSubtarget.h | 29 +- lib/Target/Hexagon/HexagonSystemInst.td | 21 + lib/Target/Hexagon/HexagonTargetMachine.cpp | 54 +- lib/Target/Hexagon/HexagonTargetMachine.h | 2 +- lib/Target/Hexagon/HexagonTargetObjectFile.cpp | 393 +++++- lib/Target/Hexagon/HexagonTargetObjectFile.h | 37 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 172 +-- lib/Target/Hexagon/HexagonVLIWPacketizer.h | 13 +- .../Hexagon/MCTargetDesc/HexagonAsmBackend.cpp | 638 ++++++++-- lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h | 22 +- .../MCTargetDesc/HexagonELFObjectWriter.cpp | 60 +- .../Hexagon/MCTargetDesc/HexagonFixupKinds.h | 1 + .../Hexagon/MCTargetDesc/HexagonInstPrinter.cpp | 12 +- .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp | 1 + lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h | 1 - .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 6 +- lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h | 5 +- .../Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp | 451 ++++--- .../Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp | 6 +- .../Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp | 19 +- lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp | 55 +- lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h | 18 +- .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp | 182 ++- .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.h | 14 +- .../Hexagon/MCTargetDesc/HexagonMCShuffler.cpp | 1 - .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 70 +- lib/Target/Hexagon/MCTargetDesc/Makefile | 16 - lib/Target/Hexagon/Makefile | 26 - lib/Target/Hexagon/RDFCopy.cpp | 217 ++-- lib/Target/Hexagon/RDFCopy.h | 12 +- lib/Target/Hexagon/RDFDeadCode.cpp | 50 +- lib/Target/Hexagon/RDFDeadCode.h | 12 +- lib/Target/Hexagon/RDFGraph.cpp | 55 +- lib/Target/Hexagon/RDFGraph.h | 33 +- lib/Target/Hexagon/RDFLiveness.cpp | 100 +- lib/Target/Hexagon/RDFLiveness.h | 8 +- lib/Target/Hexagon/TargetInfo/Makefile | 15 - 106 files changed, 10195 insertions(+), 5002 deletions(-) delete mode 100644 lib/Target/Hexagon/AsmParser/Makefile delete mode 100644 lib/Target/Hexagon/Disassembler/Makefile create mode 100644 lib/Target/Hexagon/HexagonBlockRanges.cpp create mode 100644 lib/Target/Hexagon/HexagonBlockRanges.h create mode 100644 lib/Target/Hexagon/HexagonBranchRelaxation.cpp delete mode 100644 lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp create mode 100644 lib/Target/Hexagon/HexagonOptAddrMode.cpp delete mode 100644 lib/Target/Hexagon/MCTargetDesc/Makefile delete mode 100644 lib/Target/Hexagon/Makefile delete mode 100644 lib/Target/Hexagon/TargetInfo/Makefile (limited to 'lib/Target/Hexagon') diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index a8622a96527c..496efbf7374b 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -13,14 +13,13 @@ #include "HexagonRegisterInfo.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" +#include "MCTargetDesc/HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonMCExpr.h" #include "MCTargetDesc/HexagonMCShuffler.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" -#include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonShuffler.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -31,19 +30,19 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" -#include "llvm/Support/SourceMgr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; @@ -108,7 +107,7 @@ class HexagonAsmParser : public MCTargetAsmParser { void canonicalizeImmediates(MCInst &MCI); bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc, OperandVector &InstOperands, uint64_t &ErrorInfo, - bool MatchingInlineAsm, bool &MustExtend); + bool MatchingInlineAsm); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -117,7 +116,7 @@ class HexagonAsmParser : public MCTargetAsmParser { unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; void OutOfRange(SMLoc IDLoc, long long Val, long long Max); int processInstruction(MCInst &Inst, OperandVector const &Operands, - SMLoc IDLoc, bool &MustExtend); + SMLoc IDLoc); // Check if we have an assembler and, if so, set the ELF e_header flags. void chksetELFHeaderEFlags(unsigned flags) { @@ -125,6 +124,8 @@ class HexagonAsmParser : public MCTargetAsmParser { getAssembler()->setELFHeaderEFlags(flags); } + unsigned matchRegister(StringRef Name); + /// @name Auto-generated Match Functions /// { @@ -150,7 +151,6 @@ public: } } - bool mustExtend(OperandVector &Operands); bool splitIdentifier(OperandVector &Operands); bool parseOperand(OperandVector &Operands); bool parseInstruction(OperandVector &Operands); @@ -186,7 +186,6 @@ struct HexagonOperand : public MCParsedAsmOperand { struct ImmTy { const MCExpr *Val; - bool MustExtend; }; struct InstTy { @@ -243,8 +242,8 @@ public: bool CheckImmRange(int immBits, int zeroBits, bool isSigned, bool isRelocatable, bool Extendable) const { if (Kind == Immediate) { - const MCExpr *myMCExpr = getImm(); - if (Imm.MustExtend && !Extendable) + const MCExpr *myMCExpr = &HexagonMCInstrInfo::getExpr(*getImm()); + if (HexagonMCInstrInfo::mustExtend(*Imm.Val) && !Extendable) return false; int64_t Res; if (myMCExpr->evaluateAsAbsolute(Res)) { @@ -278,6 +277,7 @@ public: bool isf32Ext() const { return false; } bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); } + bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); } bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); } bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); } bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); } @@ -347,7 +347,7 @@ public: bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); } bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); } bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); } - bool isu32MustExt() const { return isImm() && Imm.MustExtend; } + bool isu32MustExt() const { return isImm(); } void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); @@ -361,20 +361,17 @@ public: void addSignedImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - MCExpr const *Expr = getImm(); + HexagonMCExpr *Expr = + const_cast(cast(getImm())); int64_t Value; if (!Expr->evaluateAsAbsolute(Value)) { Inst.addOperand(MCOperand::createExpr(Expr)); return; } - int64_t Extended = SignExtend64 (Value, 32); - if ((Extended < 0) == (Value < 0)) { - Inst.addOperand(MCOperand::createExpr(Expr)); - return; - } - // Flip bit 33 to signal signed unsigned mismatch - Extended ^= 0x100000000; - Inst.addOperand(MCOperand::createImm(Extended)); + int64_t Extended = SignExtend64(Value, 32); + if ((Extended < 0) != (Value < 0)) + Expr->setSignMismatch(); + Inst.addOperand(MCOperand::createExpr(Expr)); } void addf32ExtOperands(MCInst &Inst, unsigned N) const { @@ -384,6 +381,9 @@ public: void adds32ImmOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } + void adds23_2ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } void adds8ImmOperands(MCInst &Inst, unsigned N) const { addSignedImmOperands(Inst, N); } @@ -553,13 +553,15 @@ public: void adds4_6ImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = + dyn_cast(&HexagonMCInstrInfo::getExpr(*getImm())); Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); } void adds3_6ImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = + dyn_cast(&HexagonMCInstrInfo::getExpr(*getImm())); Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); } @@ -592,7 +594,6 @@ public: SMLoc E) { HexagonOperand *Op = new HexagonOperand(Immediate); Op->Imm.Val = Val; - Op->Imm.MustExtend = false; Op->StartLoc = S; Op->EndLoc = E; return std::unique_ptr(Op); @@ -616,9 +617,6 @@ void HexagonOperand::print(raw_ostream &OS) const { } } -/// @name Auto-generated Match Functions -static unsigned MatchRegisterName(StringRef Name); - bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { DEBUG(dbgs() << "Bundle:"); DEBUG(MCB.dump_pretty(dbgs())); @@ -730,11 +728,10 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { bool HexagonAsmParser::matchBundleOptions() { MCAsmParser &Parser = getParser(); - MCAsmLexer &Lexer = getLexer(); while (true) { if (!Parser.getTok().is(AsmToken::Colon)) return false; - Lexer.Lex(); + Lex(); StringRef Option = Parser.getTok().getString(); if (Option.compare_lower("endloop0") == 0) HexagonMCInstrInfo::setInnerLoop(MCB); @@ -746,7 +743,7 @@ bool HexagonAsmParser::matchBundleOptions() { HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB); else return true; - Lexer.Lex(); + Lex(); } } @@ -759,33 +756,29 @@ void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) { for (MCOperand &I : MCI) if (I.isImm()) { int64_t Value (I.getImm()); - if ((Value & 0x100000000) != (Value & 0x80000000)) { - // Detect flipped bit 33 wrt bit 32 and signal warning - Value ^= 0x100000000; - if (WarnSignedMismatch) - Warning (MCI.getLoc(), "Signed/Unsigned mismatch"); - } - NewInst.addOperand(MCOperand::createExpr( - MCConstantExpr::create(Value, getContext()))); + NewInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create( + MCConstantExpr::create(Value, getContext()), getContext()))); } - else + else { + if (I.isExpr() && cast(I.getExpr())->signMismatch() && + WarnSignedMismatch) + Warning (MCI.getLoc(), "Signed/Unsigned mismatch"); NewInst.addOperand(I); + } MCI = NewInst; } bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc, OperandVector &InstOperands, uint64_t &ErrorInfo, - bool MatchingInlineAsm, - bool &MustExtend) { + bool MatchingInlineAsm) { // Perform matching with tablegen asmmatcher generated function int result = MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm); if (result == Match_Success) { MCI.setLoc(IDLoc); - MustExtend = mustExtend(InstOperands); canonicalizeImmediates(MCI); - result = processInstruction(MCI, InstOperands, IDLoc, MustExtend); + result = processInstruction(MCI, InstOperands, IDLoc); DEBUG(dbgs() << "Insn:"); DEBUG(MCI.dump_pretty(dbgs())); @@ -823,17 +816,6 @@ bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc, llvm_unreachable("Implement any new match types added!"); } -bool HexagonAsmParser::mustExtend(OperandVector &Operands) { - unsigned Count = 0; - for (std::unique_ptr &i : Operands) - if (i->isImm()) - if (static_cast(i.get())->Imm.MustExtend) - ++Count; - // Multiple extenders should have been filtered by iss9Ext et. al. - assert(Count < 2 && "Multiple extenders"); - return Count == 1; -} - bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -865,13 +847,11 @@ bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return finishBundle(IDLoc, Out); } MCInst *SubInst = new (getParser().getContext()) MCInst; - bool MustExtend = false; if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo, - MatchingInlineAsm, MustExtend)) + MatchingInlineAsm)) return true; HexagonMCInstrInfo::extendIfNeeded( - getParser().getContext(), MCII, MCB, *SubInst, - HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend); + getParser().getContext(), MCII, MCB, *SubInst); MCB.addOperand(MCOperand::createInst(SubInst)); if (!InBrackets) return finishBundle(IDLoc, Out); @@ -916,7 +896,8 @@ bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) { // end of the section. Only legacy hexagon-gcc created assembly code // used negative subsections. if ((Res < 0) && (Res > -8193)) - Subsection = MCConstantExpr::create(8192 + Res, this->getContext()); + Subsection = HexagonMCExpr::create( + MCConstantExpr::create(8192 + Res, getContext()), getContext()); getStreamer().SubSection(Subsection); return false; @@ -1110,7 +1091,7 @@ bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) { AsmToken const &Token = getParser().getTok(); StringRef String = Token.getString(); SMLoc Loc = Token.getLoc(); - getLexer().Lex(); + Lex(); do { std::pair HeadTail = String.split('.'); if (!HeadTail.first.empty()) @@ -1144,7 +1125,7 @@ bool HexagonAsmParser::parseOperand(OperandVector &Operands) { static char const *RParen = ")"; Operands.push_back(HexagonOperand::CreateToken(LParen, Begin)); Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); - AsmToken MaybeDotNew = Lexer.getTok(); + const AsmToken &MaybeDotNew = Lexer.getTok(); if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && MaybeDotNew.getString().equals_lower(".new")) splitIdentifier(Operands); @@ -1160,7 +1141,7 @@ bool HexagonAsmParser::parseOperand(OperandVector &Operands) { Operands.insert(Operands.end () - 1, HexagonOperand::CreateToken(LParen, Begin)); Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); - AsmToken MaybeDotNew = Lexer.getTok(); + const AsmToken &MaybeDotNew = Lexer.getTok(); if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && MaybeDotNew.getString().equals_lower(".new")) splitIdentifier(Operands); @@ -1186,7 +1167,7 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) { return false; if (!Token.is(AsmToken::TokenKind::Identifier)) return true; - if (!MatchRegisterName(String.lower())) + if (!matchRegister(String.lower())) return true; (void)Second; assert(Second.is(AsmToken::Colon)); @@ -1197,7 +1178,7 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) { Collapsed.end()); StringRef Whole = Collapsed; std::pair DotSplit = Whole.split('.'); - if (!MatchRegisterName(DotSplit.first.lower())) + if (!matchRegister(DotSplit.first.lower())) return true; return false; } @@ -1242,7 +1223,7 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &En Collapsed.end()); StringRef FullString = Collapsed; std::pair DotSplit = FullString.split('.'); - unsigned DotReg = MatchRegisterName(DotSplit.first.lower()); + unsigned DotReg = matchRegister(DotSplit.first.lower()); if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { if (DotSplit.second.empty()) { RegNo = DotReg; @@ -1262,7 +1243,7 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &En } } std::pair ColonSplit = StringRef(FullString).split(':'); - unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower()); + unsigned ColonReg = matchRegister(ColonSplit.first.lower()); if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { Lexer.UnLex(Lookahead.back()); Lookahead.pop_back(); @@ -1302,7 +1283,7 @@ bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) { static char const * Comma = ","; do { Tokens.emplace_back (Lexer.getTok()); - Lexer.Lex(); + Lex(); switch (Tokens.back().getKind()) { case AsmToken::TokenKind::Hash: @@ -1333,11 +1314,12 @@ bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) { if (implicitExpressionLocation(Operands)) { MCAsmParser &Parser = getParser(); SMLoc Loc = Parser.getLexer().getLoc(); - std::unique_ptr Expr = - HexagonOperand::CreateImm(nullptr, Loc, Loc); - MCExpr const *& Val = Expr->Imm.Val; - Operands.push_back(std::move(Expr)); - return parseExpression(Val); + MCExpr const *Expr = nullptr; + bool Error = parseExpression(Expr); + Expr = HexagonMCExpr::create(Expr, getContext()); + if (!Error) + Operands.push_back(HexagonOperand::CreateImm(Expr, Loc, Loc)); + return Error; } return parseOperand(Operands); } @@ -1350,7 +1332,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { AsmToken const &Token = Parser.getTok(); switch (Token.getKind()) { case AsmToken::EndOfStatement: { - Lexer.Lex(); + Lex(); return false; } case AsmToken::LCurly: { @@ -1358,19 +1340,19 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { return true; Operands.push_back( HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); - Lexer.Lex(); + Lex(); return false; } case AsmToken::RCurly: { if (Operands.empty()) { Operands.push_back( HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); - Lexer.Lex(); + Lex(); } return false; } case AsmToken::Comma: { - Lexer.Lex(); + Lex(); continue; } case AsmToken::EqualEqual: @@ -1383,30 +1365,28 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { Token.getString().substr(0, 1), Token.getLoc())); Operands.push_back(HexagonOperand::CreateToken( Token.getString().substr(1, 1), Token.getLoc())); - Lexer.Lex(); + Lex(); continue; } case AsmToken::Hash: { bool MustNotExtend = false; bool ImplicitExpression = implicitExpressionLocation(Operands); - std::unique_ptr Expr = HexagonOperand::CreateImm( - nullptr, Lexer.getLoc(), Lexer.getLoc()); + SMLoc ExprLoc = Lexer.getLoc(); if (!ImplicitExpression) Operands.push_back( HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); - Lexer.Lex(); + Lex(); bool MustExtend = false; bool HiOnly = false; bool LoOnly = false; if (Lexer.is(AsmToken::Hash)) { - Lexer.Lex(); + Lex(); MustExtend = true; } else if (ImplicitExpression) MustNotExtend = true; AsmToken const &Token = Parser.getTok(); if (Token.is(AsmToken::Identifier)) { StringRef String = Token.getString(); - AsmToken IDToken = Token; if (String.lower() == "hi") { HiOnly = true; } else if (String.lower() == "lo") { @@ -1418,27 +1398,46 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { HiOnly = false; LoOnly = false; } else { - Lexer.Lex(); + Lex(); } } } - if (parseExpression(Expr->Imm.Val)) + MCExpr const *Expr = nullptr; + if (parseExpression(Expr)) return true; int64_t Value; MCContext &Context = Parser.getContext(); - assert(Expr->Imm.Val != nullptr); - if (Expr->Imm.Val->evaluateAsAbsolute(Value)) { + assert(Expr != nullptr); + if (Expr->evaluateAsAbsolute(Value)) { if (HiOnly) - Expr->Imm.Val = MCBinaryExpr::createLShr( - Expr->Imm.Val, MCConstantExpr::create(16, Context), Context); + Expr = MCBinaryExpr::createLShr( + Expr, MCConstantExpr::create(16, Context), Context); if (HiOnly || LoOnly) - Expr->Imm.Val = MCBinaryExpr::createAnd( - Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context); + Expr = MCBinaryExpr::createAnd(Expr, + MCConstantExpr::create(0xffff, Context), + Context); + } else { + MCValue Value; + if (Expr->evaluateAsRelocatable(Value, nullptr, nullptr)) { + if (!Value.isAbsolute()) { + switch(Value.getAccessVariant()) { + case MCSymbolRefExpr::VariantKind::VK_TPREL: + case MCSymbolRefExpr::VariantKind::VK_DTPREL: + // Don't lazy extend these expression variants + MustNotExtend = !MustExtend; + break; + default: + break; + } + } + } } - if (MustNotExtend) - Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context); - Expr->Imm.MustExtend = MustExtend; - Operands.push_back(std::move(Expr)); + Expr = HexagonMCExpr::create(Expr, Context); + HexagonMCInstrInfo::setMustNotExtend(*Expr, MustNotExtend); + HexagonMCInstrInfo::setMustExtend(*Expr, MustExtend); + std::unique_ptr Operand = + HexagonOperand::CreateImm(Expr, ExprLoc, ExprLoc); + Operands.push_back(std::move(Operand)); continue; } default: @@ -1524,7 +1523,7 @@ void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) { int HexagonAsmParser::processInstruction(MCInst &Inst, OperandVector const &Operands, - SMLoc IDLoc, bool &MustExtend) { + SMLoc IDLoc) { MCContext &Context = getParser().getContext(); const MCRegisterInfo *RI = getContext().getRegisterInfo(); std::string r = "r"; @@ -1536,6 +1535,18 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, default: break; + case Hexagon::A2_iconst: { + Inst.setOpcode(Hexagon::A2_addi); + MCOperand Reg = Inst.getOperand(0); + MCOperand S16 = Inst.getOperand(1); + HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr()); + HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr()); + Inst.clear(); + Inst.addOperand(Reg); + Inst.addOperand(MCOperand::createReg(Hexagon::R0)); + Inst.addOperand(S16); + break; + } case Hexagon::M4_mpyrr_addr: case Hexagon::S4_addi_asl_ri: case Hexagon::S4_addi_lsr_ri: @@ -1555,8 +1566,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::C2_cmpgei: { MCOperand &MO = Inst.getOperand(2); - MO.setExpr(MCBinaryExpr::createSub( - MO.getExpr(), MCConstantExpr::create(1, Context), Context)); + MO.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( + MO.getExpr(), MCConstantExpr::create(1, Context), Context), Context)); Inst.setOpcode(Hexagon::C2_cmpgti); break; } @@ -1577,49 +1588,24 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, TmpInst.addOperand(Rt); Inst = TmpInst; } else { - MO.setExpr(MCBinaryExpr::createSub( - MO.getExpr(), MCConstantExpr::create(1, Context), Context)); + MO.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( + MO.getExpr(), MCConstantExpr::create(1, Context), Context), Context)); Inst.setOpcode(Hexagon::C2_cmpgtui); } break; } - case Hexagon::J2_loop1r: - case Hexagon::J2_loop1i: - case Hexagon::J2_loop0r: - case Hexagon::J2_loop0i: { - MCOperand &MO = Inst.getOperand(0); - // Loop has different opcodes for extended vs not extended, but we should - // not use the other opcode as it is a legacy artifact of TD files. - int64_t Value; - if (MO.getExpr()->evaluateAsAbsolute(Value)) { - // if the operand can fit within a 7:2 field - if (Value < (1 << 8) && Value >= -(1 << 8)) { - SMLoc myLoc = Operands[2]->getStartLoc(); - // # is left in startLoc in the case of ## - // If '##' found then force extension. - if (*myLoc.getPointer() == '#') { - MustExtend = true; - break; - } - } else { - // If immediate and out of 7:2 range. - MustExtend = true; - } - } - break; - } // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" case Hexagon::A2_tfrp: { MCOperand &MO = Inst.getOperand(1); unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + std::string R1 = r + llvm::utostr(RegPairNum + 1); StringRef Reg1(R1); - MO.setReg(MatchRegisterName(Reg1)); + MO.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr_32(RegPairNum); + std::string R2 = r + llvm::utostr(RegPairNum); StringRef Reg2(R2); - Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); Inst.setOpcode(Hexagon::A2_combinew); break; } @@ -1628,13 +1614,13 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::A2_tfrpf: { MCOperand &MO = Inst.getOperand(2); unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + std::string R1 = r + llvm::utostr(RegPairNum + 1); StringRef Reg1(R1); - MO.setReg(MatchRegisterName(Reg1)); + MO.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr_32(RegPairNum); + std::string R2 = r + llvm::utostr(RegPairNum); StringRef Reg2(R2); - Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) ? Hexagon::C2_ccombinewt : Hexagon::C2_ccombinewf); @@ -1644,19 +1630,32 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::A2_tfrpfnew: { MCOperand &MO = Inst.getOperand(2); unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); - std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + std::string R1 = r + llvm::utostr(RegPairNum + 1); StringRef Reg1(R1); - MO.setReg(MatchRegisterName(Reg1)); + MO.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr_32(RegPairNum); + std::string R2 = r + llvm::utostr(RegPairNum); StringRef Reg2(R2); - Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.addOperand(MCOperand::createReg(matchRegister(Reg2))); Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) ? Hexagon::C2_ccombinewnewt : Hexagon::C2_ccombinewnewf); break; } + // Translate a "$Vdd = $Vss" to "$Vdd = vcombine($Vs, $Vt)" + case Hexagon::HEXAGON_V6_vassignpair: { + MCOperand &MO = Inst.getOperand(1); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = v + llvm::utostr(RegPairNum + 1); + MO.setReg(MatchRegisterName(R1)); + // Add a new operand for the second register in the pair. + std::string R2 = v + llvm::utostr(RegPairNum); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(R2))); + Inst.setOpcode(Hexagon::V6_vcombine); + break; + } + // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) " case Hexagon::CONST32: case Hexagon::CONST32_Float_Real: @@ -1773,7 +1772,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCOperand &MO = Inst.getOperand(1); int64_t Value; int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0; - MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context))); + MCOperand imm(MCOperand::createExpr( + HexagonMCExpr::create(MCConstantExpr::create(sVal, Context), Context))); Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO); break; } @@ -1784,18 +1784,19 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCOperand &MO = Inst.getOperand(1); int64_t Value; if (MO.getExpr()->evaluateAsAbsolute(Value)) { - unsigned long long u64 = Value; - signed int s8 = (u64 >> 32) & 0xFFFFFFFF; - if (s8 < -128 || s8 > 127) + int s8 = Hi_32(Value); + if (!isInt<8>(s8)) OutOfRange(IDLoc, s8, -128); - MCOperand imm(MCOperand::createExpr( - MCConstantExpr::create(s8, Context))); // upper 32 - MCOperand imm2(MCOperand::createExpr( - MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32 + MCOperand imm(MCOperand::createExpr(HexagonMCExpr::create( + MCConstantExpr::create(s8, Context), Context))); // upper 32 + auto Expr = HexagonMCExpr::create( + MCConstantExpr::create(Lo_32(Value), Context), Context); + HexagonMCInstrInfo::setMustExtend(*Expr, HexagonMCInstrInfo::mustExtend(*MO.getExpr())); + MCOperand imm2(MCOperand::createExpr(Expr)); // lower 32 Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2); } else { - MCOperand imm(MCOperand::createExpr( - MCConstantExpr::create(0, Context))); // upper 32 + MCOperand imm(MCOperand::createExpr(HexagonMCExpr::create( + MCConstantExpr::create(0, Context), Context))); // upper 32 Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO); } break; @@ -1843,8 +1844,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCOperand &Rs = Inst.getOperand(2); MCOperand &Imm4 = Inst.getOperand(3); MCOperand &Imm6 = Inst.getOperand(4); - Imm6.setExpr(MCBinaryExpr::createSub( - Imm6.getExpr(), MCConstantExpr::create(1, Context), Context)); + Imm6.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(1, Context), Context), Context)); TmpInst.setOpcode(Hexagon::S2_tableidxh); TmpInst.addOperand(Rx); TmpInst.addOperand(_dst_); @@ -1862,8 +1863,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCOperand &Rs = Inst.getOperand(2); MCOperand &Imm4 = Inst.getOperand(3); MCOperand &Imm6 = Inst.getOperand(4); - Imm6.setExpr(MCBinaryExpr::createSub( - Imm6.getExpr(), MCConstantExpr::create(2, Context), Context)); + Imm6.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(2, Context), Context), Context)); TmpInst.setOpcode(Hexagon::S2_tableidxw); TmpInst.addOperand(Rx); TmpInst.addOperand(_dst_); @@ -1881,8 +1882,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCOperand &Rs = Inst.getOperand(2); MCOperand &Imm4 = Inst.getOperand(3); MCOperand &Imm6 = Inst.getOperand(4); - Imm6.setExpr(MCBinaryExpr::createSub( - Imm6.getExpr(), MCConstantExpr::create(3, Context), Context)); + Imm6.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(3, Context), Context), Context)); TmpInst.setOpcode(Hexagon::S2_tableidxd); TmpInst.addOperand(Rx); TmpInst.addOperand(_dst_); @@ -1903,12 +1904,14 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCOperand &Rs = Inst.getOperand(1); MCOperand &Imm = Inst.getOperand(2); int64_t Value; - bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + MCExpr const &Expr = *Imm.getExpr(); + bool Absolute = Expr.evaluateAsAbsolute(Value); assert(Absolute); (void)Absolute; - if (!MustExtend) { + if (!HexagonMCInstrInfo::mustExtend(Expr)) { if (Value < 0 && Value > -256) { - Imm.setExpr(MCConstantExpr::create(Value * -1, Context)); + Imm.setExpr(HexagonMCExpr::create( + MCConstantExpr::create(Value * -1, Context), Context)); TmpInst.setOpcode(Hexagon::M2_mpysin); } else if (Value < 256 && Value >= 0) TmpInst.setOpcode(Hexagon::M2_mpysip); @@ -1941,8 +1944,10 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, TmpInst.addOperand(Rd); TmpInst.addOperand(Rs); } else { - Imm.setExpr(MCBinaryExpr::createSub( - Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Imm.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(Imm.getExpr(), + MCConstantExpr::create(1, Context), Context), + Context)); TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd); MCOperand &Rd = Inst.getOperand(0); MCOperand &Rs = Inst.getOperand(1); @@ -1965,20 +1970,22 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1]) MCInst TmpInst; unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); - std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + std::string R1 = r + llvm::utostr(RegPairNum + 1); StringRef Reg1(R1); - Rss.setReg(MatchRegisterName(Reg1)); + Rss.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr_32(RegPairNum); + std::string R2 = r + llvm::utostr(RegPairNum); StringRef Reg2(R2); TmpInst.setOpcode(Hexagon::A2_combinew); TmpInst.addOperand(Rdd); TmpInst.addOperand(Rss); - TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + TmpInst.addOperand(MCOperand::createReg(matchRegister(Reg2))); Inst = TmpInst; } else { - Imm.setExpr(MCBinaryExpr::createSub( - Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Imm.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(Imm.getExpr(), + MCConstantExpr::create(1, Context), Context), + Context)); Inst.setOpcode(Hexagon::S2_asr_i_p_rnd); } break; @@ -1990,15 +1997,15 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2 Inst.setOpcode(Hexagon::A4_boundscheck_hi); std::string Name = - r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); StringRef RegPair = Name; - Rs.setReg(MatchRegisterName(RegPair)); + Rs.setReg(matchRegister(RegPair)); } else { // raw:lo Inst.setOpcode(Hexagon::A4_boundscheck_lo); std::string Name = - r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); StringRef RegPair = Name; - Rs.setReg(MatchRegisterName(RegPair)); + Rs.setReg(matchRegister(RegPair)); } break; } @@ -2009,15 +2016,15 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (RegNum & 1) { // Odd mapped to raw:hi Inst.setOpcode(Hexagon::A2_addsph); std::string Name = - r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); StringRef RegPair = Name; - Rs.setReg(MatchRegisterName(RegPair)); + Rs.setReg(matchRegister(RegPair)); } else { // Even mapped raw:lo Inst.setOpcode(Hexagon::A2_addspl); std::string Name = - r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); StringRef RegPair = Name; - Rs.setReg(MatchRegisterName(RegPair)); + Rs.setReg(matchRegister(RegPair)); } break; } @@ -2028,15 +2035,15 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (RegNum & 1) { // Odd mapped to sat:raw:hi Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h); std::string Name = - r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); StringRef RegPair = Name; - Rt.setReg(MatchRegisterName(RegPair)); + Rt.setReg(matchRegister(RegPair)); } else { // Even mapped sat:raw:lo Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l); std::string Name = - r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); StringRef RegPair = Name; - Rt.setReg(MatchRegisterName(RegPair)); + Rt.setReg(matchRegister(RegPair)); } break; } @@ -2050,15 +2057,15 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (RegNum & 1) { // Odd mapped to sat:raw:hi TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h); std::string Name = - r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); StringRef RegPair = Name; - Rt.setReg(MatchRegisterName(RegPair)); + Rt.setReg(matchRegister(RegPair)); } else { // Even mapped sat:raw:lo TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l); std::string Name = - r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); StringRef RegPair = Name; - Rt.setReg(MatchRegisterName(RegPair)); + Rt.setReg(matchRegister(RegPair)); } // Registers are in different positions TmpInst.addOperand(Rxx); @@ -2075,15 +2082,15 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h); std::string Name = - r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1); StringRef RegPair = Name; - Rt.setReg(MatchRegisterName(RegPair)); + Rt.setReg(matchRegister(RegPair)); } else { // Even mapped rnd:sat:raw:lo Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l); std::string Name = - r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum); StringRef RegPair = Name; - Rt.setReg(MatchRegisterName(RegPair)); + Rt.setReg(matchRegister(RegPair)); } break; } @@ -2097,8 +2104,10 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (Value == 0) Inst.setOpcode(Hexagon::S2_vsathub); else { - Imm.setExpr(MCBinaryExpr::createSub( - Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Imm.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(Imm.getExpr(), + MCConstantExpr::create(1, Context), Context), + Context)); Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat); } break; @@ -2115,20 +2124,22 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, if (Value == 0) { MCInst TmpInst; unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); - std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + std::string R1 = r + llvm::utostr(RegPairNum + 1); StringRef Reg1(R1); - Rss.setReg(MatchRegisterName(Reg1)); + Rss.setReg(matchRegister(Reg1)); // Add a new operand for the second register in the pair. - std::string R2 = r + llvm::utostr_32(RegPairNum); + std::string R2 = r + llvm::utostr(RegPairNum); StringRef Reg2(R2); TmpInst.setOpcode(Hexagon::A2_combinew); TmpInst.addOperand(Rdd); TmpInst.addOperand(Rss); - TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + TmpInst.addOperand(MCOperand::createReg(matchRegister(Reg2))); Inst = TmpInst; } else { - Imm.setExpr(MCBinaryExpr::createSub( - Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Imm.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(Imm.getExpr(), + MCConstantExpr::create(1, Context), Context), + Context)); Inst.setOpcode(Hexagon::S5_vasrhrnd); } break; @@ -2140,8 +2151,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCOperand &Rs = Inst.getOperand(1); TmpInst.setOpcode(Hexagon::A2_subri); TmpInst.addOperand(Rd); - TmpInst.addOperand( - MCOperand::createExpr(MCConstantExpr::create(-1, Context))); + TmpInst.addOperand(MCOperand::createExpr( + HexagonMCExpr::create(MCConstantExpr::create(-1, Context), Context))); TmpInst.addOperand(Rs); Inst = TmpInst; break; @@ -2150,3 +2161,10 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, return Match_Success; } + + +unsigned HexagonAsmParser::matchRegister(StringRef Name) { + if (unsigned Reg = MatchRegisterName(Name)) + return Reg; + return MatchRegisterAltName(Name); +} diff --git a/lib/Target/Hexagon/AsmParser/Makefile b/lib/Target/Hexagon/AsmParser/Makefile deleted file mode 100644 index 0aa0b4140c3e..000000000000 --- a/lib/Target/Hexagon/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Hexagon/AsmParser/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMHexagonAsmParser - -# Hack: we need to include 'main' Hexagon target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index ea96eb0ee10a..d052a835fbd8 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -84,87 +84,89 @@ namespace { } } -raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::BitValue &BV) { - switch (BV.Type) { - case BT::BitValue::Top: - OS << 'T'; - break; - case BT::BitValue::Zero: - OS << '0'; - break; - case BT::BitValue::One: - OS << '1'; - break; - case BT::BitValue::Ref: - OS << printv(BV.RefI.Reg) << '[' << BV.RefI.Pos << ']'; - break; +namespace llvm { + raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) { + switch (BV.Type) { + case BT::BitValue::Top: + OS << 'T'; + break; + case BT::BitValue::Zero: + OS << '0'; + break; + case BT::BitValue::One: + OS << '1'; + break; + case BT::BitValue::Ref: + OS << printv(BV.RefI.Reg) << '[' << BV.RefI.Pos << ']'; + break; + } + return OS; } - return OS; -} -raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::RegisterCell &RC) { - unsigned n = RC.Bits.size(); - OS << "{ w:" << n; - // Instead of printing each bit value individually, try to group them - // into logical segments, such as sequences of 0 or 1 bits or references - // to consecutive bits (e.g. "bits 3-5 are same as bits 7-9 of reg xyz"). - // "Start" will be the index of the beginning of the most recent segment. - unsigned Start = 0; - bool SeqRef = false; // A sequence of refs to consecutive bits. - bool ConstRef = false; // A sequence of refs to the same bit. - - for (unsigned i = 1, n = RC.Bits.size(); i < n; ++i) { - const BT::BitValue &V = RC[i]; - const BT::BitValue &SV = RC[Start]; - bool IsRef = (V.Type == BT::BitValue::Ref); - // If the current value is the same as Start, skip to the next one. - if (!IsRef && V == SV) - continue; - if (IsRef && SV.Type == BT::BitValue::Ref && V.RefI.Reg == SV.RefI.Reg) { - if (Start+1 == i) { - SeqRef = (V.RefI.Pos == SV.RefI.Pos+1); - ConstRef = (V.RefI.Pos == SV.RefI.Pos); - } - if (SeqRef && V.RefI.Pos == SV.RefI.Pos+(i-Start)) - continue; - if (ConstRef && V.RefI.Pos == SV.RefI.Pos) + raw_ostream &operator<<(raw_ostream &OS, const BT::RegisterCell &RC) { + unsigned n = RC.Bits.size(); + OS << "{ w:" << n; + // Instead of printing each bit value individually, try to group them + // into logical segments, such as sequences of 0 or 1 bits or references + // to consecutive bits (e.g. "bits 3-5 are same as bits 7-9 of reg xyz"). + // "Start" will be the index of the beginning of the most recent segment. + unsigned Start = 0; + bool SeqRef = false; // A sequence of refs to consecutive bits. + bool ConstRef = false; // A sequence of refs to the same bit. + + for (unsigned i = 1, n = RC.Bits.size(); i < n; ++i) { + const BT::BitValue &V = RC[i]; + const BT::BitValue &SV = RC[Start]; + bool IsRef = (V.Type == BT::BitValue::Ref); + // If the current value is the same as Start, skip to the next one. + if (!IsRef && V == SV) continue; + if (IsRef && SV.Type == BT::BitValue::Ref && V.RefI.Reg == SV.RefI.Reg) { + if (Start+1 == i) { + SeqRef = (V.RefI.Pos == SV.RefI.Pos+1); + ConstRef = (V.RefI.Pos == SV.RefI.Pos); + } + if (SeqRef && V.RefI.Pos == SV.RefI.Pos+(i-Start)) + continue; + if (ConstRef && V.RefI.Pos == SV.RefI.Pos) + continue; + } + + // The current value is different. Print the previous one and reset + // the Start. + OS << " [" << Start; + unsigned Count = i - Start; + if (Count == 1) { + OS << "]:" << SV; + } else { + OS << '-' << i-1 << "]:"; + if (SV.Type == BT::BitValue::Ref && SeqRef) + OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-' + << SV.RefI.Pos+(Count-1) << ']'; + else + OS << SV; + } + Start = i; + SeqRef = ConstRef = false; } - // The current value is different. Print the previous one and reset - // the Start. OS << " [" << Start; - unsigned Count = i - Start; - if (Count == 1) { - OS << "]:" << SV; + unsigned Count = n - Start; + if (n-Start == 1) { + OS << "]:" << RC[Start]; } else { - OS << '-' << i-1 << "]:"; + OS << '-' << n-1 << "]:"; + const BT::BitValue &SV = RC[Start]; if (SV.Type == BT::BitValue::Ref && SeqRef) OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-' << SV.RefI.Pos+(Count-1) << ']'; else OS << SV; } - Start = i; - SeqRef = ConstRef = false; - } + OS << " }"; - OS << " [" << Start; - unsigned Count = n - Start; - if (n-Start == 1) { - OS << "]:" << RC[Start]; - } else { - OS << '-' << n-1 << "]:"; - const BT::BitValue &SV = RC[Start]; - if (SV.Type == BT::BitValue::Ref && SeqRef) - OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-' - << SV.RefI.Pos+(Count-1) << ']'; - else - OS << SV; + return OS; } - OS << " }"; - - return OS; } BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) @@ -420,7 +422,7 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const { BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { - APInt A = CI->getValue(); + const APInt &A = CI->getValue(); uint16_t BW = A.getBitWidth(); assert((unsigned)BW == A.getBitWidth() && "BitWidth overflow"); RegisterCell Res(BW); @@ -731,18 +733,18 @@ BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { return BitMask(0, W-1); } - -bool BT::MachineEvaluator::evaluate(const MachineInstr *MI, - const CellMapType &Inputs, CellMapType &Outputs) const { - unsigned Opc = MI->getOpcode(); +bool BT::MachineEvaluator::evaluate(const MachineInstr &MI, + const CellMapType &Inputs, + CellMapType &Outputs) const { + unsigned Opc = MI.getOpcode(); switch (Opc) { case TargetOpcode::REG_SEQUENCE: { - RegisterRef RD = MI->getOperand(0); + RegisterRef RD = MI.getOperand(0); assert(RD.Sub == 0); - RegisterRef RS = MI->getOperand(1); - unsigned SS = MI->getOperand(2).getImm(); - RegisterRef RT = MI->getOperand(3); - unsigned ST = MI->getOperand(4).getImm(); + RegisterRef RS = MI.getOperand(1); + unsigned SS = MI.getOperand(2).getImm(); + RegisterRef RT = MI.getOperand(3); + unsigned ST = MI.getOperand(4).getImm(); assert(SS != ST); uint16_t W = getRegBitWidth(RD); @@ -756,8 +758,8 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr *MI, case TargetOpcode::COPY: { // COPY can transfer a smaller register into a wider one. // If that is the case, fill the remaining high bits with 0. - RegisterRef RD = MI->getOperand(0); - RegisterRef RS = MI->getOperand(1); + RegisterRef RD = MI.getOperand(0); + RegisterRef RS = MI.getOperand(1); assert(RD.Sub == 0); uint16_t WD = getRegBitWidth(RD); uint16_t WS = getRegBitWidth(RS); @@ -780,12 +782,12 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr *MI, // Main W-Z implementation. -void BT::visitPHI(const MachineInstr *PI) { - int ThisN = PI->getParent()->getNumber(); +void BT::visitPHI(const MachineInstr &PI) { + int ThisN = PI.getParent()->getNumber(); if (Trace) - dbgs() << "Visit FI(BB#" << ThisN << "): " << *PI; + dbgs() << "Visit FI(BB#" << ThisN << "): " << PI; - const MachineOperand &MD = PI->getOperand(0); + const MachineOperand &MD = PI.getOperand(0); assert(MD.getSubReg() == 0 && "Unexpected sub-register in definition"); RegisterRef DefRR(MD); uint16_t DefBW = ME.getRegBitWidth(DefRR); @@ -796,8 +798,8 @@ void BT::visitPHI(const MachineInstr *PI) { bool Changed = false; - for (unsigned i = 1, n = PI->getNumOperands(); i < n; i += 2) { - const MachineBasicBlock *PB = PI->getOperand(i+1).getMBB(); + for (unsigned i = 1, n = PI.getNumOperands(); i < n; i += 2) { + const MachineBasicBlock *PB = PI.getOperand(i + 1).getMBB(); int PredN = PB->getNumber(); if (Trace) dbgs() << " edge BB#" << PredN << "->BB#" << ThisN; @@ -807,7 +809,7 @@ void BT::visitPHI(const MachineInstr *PI) { continue; } - RegisterRef RU = PI->getOperand(i); + RegisterRef RU = PI.getOperand(i); RegisterCell ResC = ME.getCell(RU, Map); if (Trace) dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub) @@ -824,22 +826,21 @@ void BT::visitPHI(const MachineInstr *PI) { } } - -void BT::visitNonBranch(const MachineInstr *MI) { +void BT::visitNonBranch(const MachineInstr &MI) { if (Trace) { - int ThisN = MI->getParent()->getNumber(); - dbgs() << "Visit MI(BB#" << ThisN << "): " << *MI; + int ThisN = MI.getParent()->getNumber(); + dbgs() << "Visit MI(BB#" << ThisN << "): " << MI; } - if (MI->isDebugValue()) + if (MI.isDebugValue()) return; - assert(!MI->isBranch() && "Unexpected branch instruction"); + assert(!MI.isBranch() && "Unexpected branch instruction"); CellMapType ResMap; bool Eval = ME.evaluate(MI, Map, ResMap); if (Trace && Eval) { - for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; RegisterRef RU(MO); @@ -857,8 +858,8 @@ void BT::visitNonBranch(const MachineInstr *MI) { // Iterate over all definitions of the instruction, and update the // cells accordingly. - for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI.getOperand(i); // Visit register defs only. if (!MO.isReg() || !MO.isDef()) continue; @@ -905,9 +906,8 @@ void BT::visitNonBranch(const MachineInstr *MI) { } } - -void BT::visitBranchesFrom(const MachineInstr *BI) { - const MachineBasicBlock &B = *BI->getParent(); +void BT::visitBranchesFrom(const MachineInstr &BI) { + const MachineBasicBlock &B = *BI.getParent(); MachineBasicBlock::const_iterator It = BI, End = B.end(); BranchTargetList Targets, BTs; bool FallsThrough = true, DefaultToAll = false; @@ -915,11 +915,11 @@ void BT::visitBranchesFrom(const MachineInstr *BI) { do { BTs.clear(); - const MachineInstr *MI = &*It; + const MachineInstr &MI = *It; if (Trace) - dbgs() << "Visit BR(BB#" << ThisN << "): " << *MI; - assert(MI->isBranch() && "Expecting branch instruction"); - InstrExec.insert(MI); + dbgs() << "Visit BR(BB#" << ThisN << "): " << MI; + assert(MI.isBranch() && "Expecting branch instruction"); + InstrExec.insert(&MI); bool Eval = ME.evaluate(MI, Map, BTs, FallsThrough); if (!Eval) { // If the evaluation failed, we will add all targets. Keep going in @@ -983,11 +983,11 @@ void BT::visitUsesOf(unsigned Reg) { if (!InstrExec.count(UseI)) continue; if (UseI->isPHI()) - visitPHI(UseI); + visitPHI(*UseI); else if (!UseI->isBranch()) - visitNonBranch(UseI); + visitNonBranch(*UseI); else - visitBranchesFrom(UseI); + visitBranchesFrom(*UseI); } } @@ -1084,8 +1084,8 @@ void BT::run() { MachineBasicBlock::const_iterator It = B.begin(), End = B.end(); // Visit PHI nodes first. while (It != End && It->isPHI()) { - const MachineInstr *PI = &*It++; - InstrExec.insert(PI); + const MachineInstr &PI = *It++; + InstrExec.insert(&PI); visitPHI(PI); } @@ -1098,8 +1098,8 @@ void BT::run() { // Visit non-branch instructions. while (It != End && !It->isBranch()) { - const MachineInstr *MI = &*It++; - InstrExec.insert(MI); + const MachineInstr &MI = *It++; + InstrExec.insert(&MI); visitNonBranch(MI); } // If block end has been reached, add the fall-through edge to the queue. @@ -1114,7 +1114,7 @@ void BT::run() { } else { // Handle the remaining sequence of branches. This function will update // the work queue. - visitBranchesFrom(It); + visitBranchesFrom(*It); } } // while (!FlowQ->empty()) diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h index 959c8318fd60..5b925fe696f8 100644 --- a/lib/Target/Hexagon/BitTracker.h +++ b/lib/Target/Hexagon/BitTracker.h @@ -51,9 +51,9 @@ struct BitTracker { bool reached(const MachineBasicBlock *B) const; private: - void visitPHI(const MachineInstr *PI); - void visitNonBranch(const MachineInstr *MI); - void visitBranchesFrom(const MachineInstr *BI); + void visitPHI(const MachineInstr &PI); + void visitNonBranch(const MachineInstr &MI); + void visitBranchesFrom(const MachineInstr &BI); void visitUsesOf(unsigned Reg); void reset(); @@ -417,13 +417,13 @@ struct BitTracker::MachineEvaluator { // Evaluate a non-branching machine instruction, given the cell map with // the input values. Place the results in the Outputs map. Return "true" // if evaluation succeeded, "false" otherwise. - virtual bool evaluate(const MachineInstr *MI, const CellMapType &Inputs, + virtual bool evaluate(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const; // Evaluate a branch, given the cell map with the input values. Fill out // a list of all possible branch targets and indicate (through a flag) // whether the branch could fall-through. Return "true" if this information // has been successfully computed, "false" otherwise. - virtual bool evaluate(const MachineInstr *BI, const CellMapType &Inputs, + virtual bool evaluate(const MachineInstr &BI, const CellMapType &Inputs, BranchTargetList &Targets, bool &FallsThru) const = 0; const TargetRegisterInfo &TRI; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 333ca6a757aa..0e32f25f52b7 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -17,12 +17,13 @@ add_llvm_target(HexagonCodeGen HexagonAsmPrinter.cpp HexagonBitSimplify.cpp HexagonBitTracker.cpp + HexagonBlockRanges.cpp + HexagonBranchRelaxation.cpp HexagonCFGOptimizer.cpp HexagonCommonGEP.cpp HexagonCopyToCombine.cpp HexagonEarlyIfConv.cpp HexagonExpandCondsets.cpp - HexagonExpandPredSpillCode.cpp HexagonFixupHwLoops.cpp HexagonFrameLowering.cpp HexagonGenExtract.cpp @@ -37,6 +38,7 @@ add_llvm_target(HexagonCodeGen HexagonMachineScheduler.cpp HexagonMCInstLower.cpp HexagonNewValueJump.cpp + HexagonOptAddrMode.cpp HexagonOptimizeSZextends.cpp HexagonPeephole.cpp HexagonRDF.cpp @@ -55,7 +57,7 @@ add_llvm_target(HexagonCodeGen RDFDeadCode.cpp RDFGraph.cpp RDFLiveness.cpp -) + ) add_subdirectory(AsmParser) add_subdirectory(TargetInfo) diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 4a9c3413cb29..7bc08ecfcab6 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -16,7 +16,7 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" @@ -30,7 +30,6 @@ #include "llvm/Support/MemoryObject.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" -#include using namespace llvm; using namespace Hexagon; @@ -382,7 +381,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0; else if (SubregBit) - // Subreg bit should not be set for non-doublevector newvalue producers + // Hexagon PRM 10.11 New-value operands + // Nt[0] is reserved and should always be encoded as zero. return MCDisassembler::Fail; assert(Producer != Hexagon::NoRegister); MCO.setReg(Producer); @@ -1459,6 +1459,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); MI->addOperand(Op); + break; case Hexagon::V4_SA1_and1: case Hexagon::V4_SA1_dec: case Hexagon::V4_SA1_inc: diff --git a/lib/Target/Hexagon/Disassembler/Makefile b/lib/Target/Hexagon/Disassembler/Makefile deleted file mode 100644 index 16c305fe4074..000000000000 --- a/lib/Target/Hexagon/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===-- lib/Target/Hexagon/Disassembler/Makefile -----------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMHexagonDisassembler - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index 5a7eb215de42..aaa0f3e9b3d3 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -47,7 +47,6 @@ def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; def UseHVXDbl : Predicate<"HST->useHVXDblOps()">, AssemblerPredicate<"ExtensionHVXDbl">; def UseHVXSgl : Predicate<"HST->useHVXSglOps()">; - def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">, AssemblerPredicate<"ExtensionHVX">; @@ -171,6 +170,15 @@ def getBaseWithImmOffset : InstrMapping { let ValueCols = [["BaseImmOffset"]]; } +def getAbsoluteForm : InstrMapping { + let FilterClass = "AddrModeRel"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore", + "isFloat"]; + let ColFields = ["addrMode"]; + let KeyCol = ["BaseImmOffset"]; + let ValueCols = [["Absolute"]]; +} + def getBaseWithRegOffset : InstrMapping { let FilterClass = "AddrModeRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; @@ -179,6 +187,22 @@ def getBaseWithRegOffset : InstrMapping { let ValueCols = [["BaseRegOffset"]]; } +def xformRegToImmOffset : InstrMapping { + let FilterClass = "AddrModeRel"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["addrMode"]; + let KeyCol = ["BaseRegOffset"]; + let ValueCols = [["BaseImmOffset"]]; +} + +def getBaseWithLongOffset : InstrMapping { + let FilterClass = "ImmRegShl"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["addrMode"]; + let KeyCol = ["BaseRegOffset"]; + let ValueCols = [["BaseLongOffset"]]; +} + def getRegForm : InstrMapping { let FilterClass = "ImmRegRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue"]; @@ -252,6 +276,7 @@ def : Proc<"hexagonv60", HexagonModelV60, //===----------------------------------------------------------------------===// def HexagonAsmParser : AsmParser { + let ShouldEmitMatchRegisterAltName = 1; bit HasMnemonicFirst = 0; } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 4c7c0392a132..cd954a146104 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -21,8 +21,6 @@ #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -44,7 +42,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" @@ -264,6 +261,19 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, switch (Inst.getOpcode()) { default: return; + case Hexagon::A2_iconst: { + Inst.setOpcode(Hexagon::A2_addi); + MCOperand Reg = Inst.getOperand(0); + MCOperand S16 = Inst.getOperand(1); + HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr()); + HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr()); + Inst.clear(); + Inst.addOperand(Reg); + Inst.addOperand(MCOperand::createReg(Hexagon::R0)); + Inst.addOperand(S16); + break; + } + // "$dst = CONST64(#$src1)", case Hexagon::CONST64_Float_Real: case Hexagon::CONST64_Int_Real: @@ -297,8 +307,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCOperand &Reg = MappedInst.getOperand(0); TmpInst.setOpcode(Hexagon::L2_loadrigp); TmpInst.addOperand(Reg); - TmpInst.addOperand(MCOperand::createExpr( - MCSymbolRefExpr::create(Sym, OutContext))); + TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create( + MCSymbolRefExpr::create(Sym, OutContext), OutContext))); MappedInst = TmpInst; } break; @@ -367,7 +377,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, int64_t Imm; MCExpr const *Expr = MO.getExpr(); bool Success = Expr->evaluateAsAbsolute(Imm); - assert (Success && "Expected immediate and none was found");(void)Success; + assert (Success && "Expected immediate and none was found"); + (void)Success; MCInst TmpInst; if (Imm == 0) { TmpInst.setOpcode(Hexagon::S2_vsathub); @@ -381,7 +392,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, TmpInst.addOperand(MappedInst.getOperand(1)); const MCExpr *One = MCConstantExpr::create(1, OutContext); const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); - TmpInst.addOperand(MCOperand::createExpr(Sub)); + TmpInst.addOperand( + MCOperand::createExpr(HexagonMCExpr::create(Sub, OutContext))); MappedInst = TmpInst; return; } @@ -391,7 +403,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCExpr const *Expr = MO2.getExpr(); int64_t Imm; bool Success = Expr->evaluateAsAbsolute(Imm); - assert (Success && "Expected immediate and none was found");(void)Success; + assert (Success && "Expected immediate and none was found"); + (void)Success; MCInst TmpInst; if (Imm == 0) { TmpInst.setOpcode(Hexagon::A2_combinew); @@ -414,7 +427,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, TmpInst.addOperand(MappedInst.getOperand(1)); const MCExpr *One = MCConstantExpr::create(1, OutContext); const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); - TmpInst.addOperand(MCOperand::createExpr(Sub)); + TmpInst.addOperand( + MCOperand::createExpr(HexagonMCExpr::create(Sub, OutContext))); MappedInst = TmpInst; return; } @@ -424,7 +438,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCExpr const *Expr = MO.getExpr(); int64_t Imm; bool Success = Expr->evaluateAsAbsolute(Imm); - assert (Success && "Expected immediate and none was found");(void)Success; + assert (Success && "Expected immediate and none was found"); + (void)Success; MCInst TmpInst; if (Imm == 0) { TmpInst.setOpcode(Hexagon::A2_tfr); @@ -438,7 +453,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, TmpInst.addOperand(MappedInst.getOperand(1)); const MCExpr *One = MCConstantExpr::create(1, OutContext); const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); - TmpInst.addOperand(MCOperand::createExpr(Sub)); + TmpInst.addOperand( + MCOperand::createExpr(HexagonMCExpr::create(Sub, OutContext))); MappedInst = TmpInst; return; } @@ -470,10 +486,10 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, bool Success = MO.getExpr()->evaluateAsAbsolute(Imm); if (Success && Imm < 0) { const MCExpr *MOne = MCConstantExpr::create(-1, OutContext); - TmpInst.addOperand(MCOperand::createExpr(MOne)); + TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(MOne, OutContext))); } else { const MCExpr *Zero = MCConstantExpr::create(0, OutContext); - TmpInst.addOperand(MCOperand::createExpr(Zero)); + TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(Zero, OutContext))); } TmpInst.addOperand(MO); MappedInst = TmpInst; @@ -523,12 +539,13 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCExpr const *Expr = Imm.getExpr(); int64_t Value; bool Success = Expr->evaluateAsAbsolute(Value); - assert(Success);(void)Success; + assert(Success); + (void)Success; if (Value < 0 && Value > -256) { MappedInst.setOpcode(Hexagon::M2_mpysin); - Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext)); - } - else + Imm.setExpr(HexagonMCExpr::create( + MCUnaryExpr::createMinus(Expr, OutContext), OutContext)); + } else MappedInst.setOpcode(Hexagon::M2_mpysip); return; } diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 4d2b54521e83..c8b4a4cf9382 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -9,18 +9,17 @@ #define DEBUG_TYPE "hexbit" -#include "llvm/CodeGen/Passes.h" +#include "HexagonBitTracker.h" +#include "HexagonTargetMachine.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "HexagonTargetMachine.h" -#include "HexagonBitTracker.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -159,8 +158,6 @@ namespace { static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); - static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B, - uint16_t W); static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, uint16_t W); static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, @@ -284,17 +281,6 @@ bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, return true; } - -bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC, - uint16_t B, uint16_t W) { - assert(B < RC.width() && B+W <= RC.width()); - for (uint16_t i = B; i < B+W; ++i) - if (!RC[i].num()) - return false; - return true; -} - - bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, uint16_t B, uint16_t W) { assert(B < RC.width() && B+W <= RC.width()); @@ -876,6 +862,12 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( case Hexagon::DoubleRegsRegClassID: VerifySR(RR.Sub); return &Hexagon::IntRegsRegClass; + case Hexagon::VecDblRegsRegClassID: + VerifySR(RR.Sub); + return &Hexagon::VectorRegsRegClass; + case Hexagon::VecDblRegs128BRegClassID: + VerifySR(RR.Sub); + return &Hexagon::VectorRegs128BRegClass; } return nullptr; } @@ -1297,7 +1289,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, continue; // If found, replace the instruction with a COPY. - DebugLoc DL = MI->getDebugLoc(); + const DebugLoc &DL = MI->getDebugLoc(); const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); unsigned NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) @@ -1326,7 +1318,7 @@ namespace { : Transformation(true), HII(hii), MRI(mri), BT(bt) {} bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; private: - bool isTfrConst(const MachineInstr *MI) const; + bool isTfrConst(const MachineInstr &MI) const; bool isConst(unsigned R, int64_t &V) const; unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); @@ -1354,9 +1346,8 @@ bool ConstGeneration::isConst(unsigned R, int64_t &C) const { return true; } - -bool ConstGeneration::isTfrConst(const MachineInstr *MI) const { - unsigned Opc = MI->getOpcode(); +bool ConstGeneration::isTfrConst(const MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); switch (Opc) { case Hexagon::A2_combineii: case Hexagon::A4_combineii: @@ -1426,7 +1417,7 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { RegisterSet Defs; for (auto I = B.begin(), E = B.end(); I != E; ++I) { - if (isTfrConst(I)) + if (isTfrConst(*I)) continue; Defs.clear(); HBS::getInstrDefs(*I, Defs); @@ -1960,11 +1951,10 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI, NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR) .addReg(L.Reg, 0, L.Sub); - } else if (!L.Low && Opc != Hexagon::S2_extractu) { + } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) { NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR) + BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR) .addReg(L.Reg, 0, L.Sub) - .addImm(16) .addImm(16); } if (NewR == 0) @@ -2187,6 +2177,9 @@ bool BitSimplification::processBlock(MachineBasicBlock &B, bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + auto &HST = MF.getSubtarget(); auto &HRI = *HST.getRegisterInfo(); auto &HII = *HST.getInstrInfo(); @@ -2729,6 +2722,9 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + auto &HST = MF.getSubtarget(); HII = HST.getInstrInfo(); HRI = HST.getRegisterInfo(); diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index d5848dc45a3b..78b57d27ad50 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -102,9 +102,9 @@ class RegisterRefs { std::vector Vector; public: - RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) { + RegisterRefs(const MachineInstr &MI) : Vector(MI.getNumOperands()) { for (unsigned i = 0, n = Vector.size(); i < n; ++i) { - const MachineOperand &MO = MI->getOperand(i); + const MachineOperand &MO = MI.getOperand(i); if (MO.isReg()) Vector[i] = BT::RegisterRef(MO); // For indices that don't correspond to registers, the entry will @@ -121,13 +121,14 @@ public: }; } -bool HexagonEvaluator::evaluate(const MachineInstr *MI, - const CellMapType &Inputs, CellMapType &Outputs) const { +bool HexagonEvaluator::evaluate(const MachineInstr &MI, + const CellMapType &Inputs, + CellMapType &Outputs) const { unsigned NumDefs = 0; // Sanity verification: there should not be any defs with subregisters. - for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; NumDefs++; @@ -137,7 +138,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI, if (NumDefs == 0) return false; - if (MI->mayLoad()) + if (MI.mayLoad()) return evaluateLoad(MI, Inputs, Outputs); // Check COPY instructions that copy formal parameters into virtual @@ -154,7 +155,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI, // was not a COPY, it would not be clear how to mirror that extension // on the callee's side. For that reason, only check COPY instructions // for potential extensions. - if (MI->isCopy()) { + if (MI.isCopy()) { if (evaluateFormalCopy(MI, Inputs, Outputs)) return true; } @@ -165,19 +166,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI, // checking what kind of operand a given instruction has individually // for each instruction, do it here. Global symbols as operands gene- // rally do not provide any useful information. - for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() || MO.isCPI()) return false; } RegisterRefs Reg(MI); - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI.getOpcode(); using namespace Hexagon; - #define op(i) MI->getOperand(i) - #define rc(i) RegisterCell::ref(getCell(Reg[i],Inputs)) - #define im(i) MI->getOperand(i).getImm() +#define op(i) MI.getOperand(i) +#define rc(i) RegisterCell::ref(getCell(Reg[i], Inputs)) +#define im(i) MI.getOperand(i).getImm() // If the instruction has no register operands, skip it. if (Reg.size() == 0) @@ -190,9 +191,9 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI, return true; }; // Get the cell corresponding to the N-th operand. - auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W) - -> BT::RegisterCell { - const MachineOperand &Op = MI->getOperand(N); + auto cop = [this, &Reg, &MI, &Inputs](unsigned N, + uint16_t W) -> BT::RegisterCell { + const MachineOperand &Op = MI.getOperand(N); if (Op.isImm()) return eIMM(Op.getImm(), W); if (!Op.isReg()) @@ -879,13 +880,13 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI, return false; } - -bool HexagonEvaluator::evaluate(const MachineInstr *BI, - const CellMapType &Inputs, BranchTargetList &Targets, - bool &FallsThru) const { +bool HexagonEvaluator::evaluate(const MachineInstr &BI, + const CellMapType &Inputs, + BranchTargetList &Targets, + bool &FallsThru) const { // We need to evaluate one branch at a time. TII::AnalyzeBranch checks // all the branches in a basic block at once, so we cannot use it. - unsigned Opc = BI->getOpcode(); + unsigned Opc = BI.getOpcode(); bool SimpleBranch = false; bool Negated = false; switch (Opc) { @@ -901,7 +902,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *BI, SimpleBranch = true; break; case Hexagon::J2_jump: - Targets.insert(BI->getOperand(0).getMBB()); + Targets.insert(BI.getOperand(0).getMBB()); FallsThru = false; return true; default: @@ -914,7 +915,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *BI, return false; // BI is a conditional branch if we got here. - RegisterRef PR = BI->getOperand(0); + RegisterRef PR = BI.getOperand(0); RegisterCell PC = getCell(PR, Inputs); const BT::BitValue &Test = PC[0]; @@ -929,18 +930,18 @@ bool HexagonEvaluator::evaluate(const MachineInstr *BI, return true; } - Targets.insert(BI->getOperand(1).getMBB()); + Targets.insert(BI.getOperand(1).getMBB()); FallsThru = false; return true; } - -bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI, - const CellMapType &Inputs, CellMapType &Outputs) const { +bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, + const CellMapType &Inputs, + CellMapType &Outputs) const { if (TII.isPredicated(MI)) return false; - assert(MI->mayLoad() && "A load that mayn't?"); - unsigned Opc = MI->getOpcode(); + assert(MI.mayLoad() && "A load that mayn't?"); + unsigned Opc = MI.getOpcode(); uint16_t BitNum; bool SignEx; @@ -1067,7 +1068,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI, break; } - const MachineOperand &MD = MI->getOperand(0); + const MachineOperand &MD = MI.getOperand(0); assert(MD.isReg() && MD.isDef()); RegisterRef RD = MD; @@ -1091,15 +1092,15 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI, return true; } - -bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr *MI, - const CellMapType &Inputs, CellMapType &Outputs) const { +bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI, + const CellMapType &Inputs, + CellMapType &Outputs) const { // If MI defines a formal parameter, but is not a copy (loads are handled // in evaluateLoad), then it's not clear what to do. - assert(MI->isCopy()); + assert(MI.isCopy()); - RegisterRef RD = MI->getOperand(0); - RegisterRef RS = MI->getOperand(1); + RegisterRef RD = MI.getOperand(0); + RegisterRef RS = MI.getOperand(1); assert(RD.Sub == 0); if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg)) return false; diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h index 897af2d71870..9e7b1dbe298f 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.h +++ b/lib/Target/Hexagon/HexagonBitTracker.h @@ -26,9 +26,9 @@ struct HexagonEvaluator : public BitTracker::MachineEvaluator { HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri, const HexagonInstrInfo &tii, MachineFunction &mf); - bool evaluate(const MachineInstr *MI, const CellMapType &Inputs, + bool evaluate(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const override; - bool evaluate(const MachineInstr *BI, const CellMapType &Inputs, + bool evaluate(const MachineInstr &BI, const CellMapType &Inputs, BranchTargetList &Targets, bool &FallsThru) const override; BitTracker::BitMask mask(unsigned Reg, unsigned Sub) const override; @@ -38,9 +38,9 @@ struct HexagonEvaluator : public BitTracker::MachineEvaluator { const HexagonInstrInfo &TII; private: - bool evaluateLoad(const MachineInstr *MI, const CellMapType &Inputs, + bool evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const; - bool evaluateFormalCopy(const MachineInstr *MI, const CellMapType &Inputs, + bool evaluateFormalCopy(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const; unsigned getNextPhysReg(unsigned PReg, unsigned Width) const; diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp new file mode 100644 index 000000000000..5c44029dc6e7 --- /dev/null +++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -0,0 +1,483 @@ +//===--- HexagonBlockRanges.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hbr" + +#include "HexagonBlockRanges.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include + +using namespace llvm; + +bool HexagonBlockRanges::IndexRange::overlaps(const IndexRange &A) const { + // If A contains start(), or "this" contains A.start(), then overlap. + IndexType S = start(), E = end(), AS = A.start(), AE = A.end(); + if (AS == S) + return true; + bool SbAE = (S < AE) || (S == AE && A.TiedEnd); // S-before-AE. + bool ASbE = (AS < E) || (AS == E && TiedEnd); // AS-before-E. + if ((AS < S && SbAE) || (S < AS && ASbE)) + return true; + // Otherwise no overlap. + return false; +} + + +bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const { + if (start() <= A.start()) { + // Treat "None" in the range end as equal to the range start. + IndexType E = (end() != IndexType::None) ? end() : start(); + IndexType AE = (A.end() != IndexType::None) ? A.end() : A.start(); + if (AE <= E) + return true; + } + return false; +} + + +void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) { + // Allow merging adjacent ranges. + assert(end() == A.start() || overlaps(A)); + IndexType AS = A.start(), AE = A.end(); + if (AS < start() || start() == IndexType::None) + setStart(AS); + if (end() < AE || end() == IndexType::None) { + setEnd(AE); + TiedEnd = A.TiedEnd; + } else { + if (end() == AE) + TiedEnd |= A.TiedEnd; + } + if (A.Fixed) + Fixed = true; +} + + +void HexagonBlockRanges::RangeList::include(const RangeList &RL) { + for (auto &R : RL) + if (std::find(begin(), end(), R) == end()) + push_back(R); +} + + +// Merge all overlapping ranges in the list, so that all that remains +// is a list of disjoint ranges. +void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) { + if (empty()) + return; + + std::sort(begin(), end()); + iterator Iter = begin(); + + while (Iter != end()-1) { + iterator Next = std::next(Iter); + // If MergeAdjacent is true, merge ranges A and B, where A.end == B.start. + // This allows merging dead ranges, but is not valid for live ranges. + bool Merge = MergeAdjacent && (Iter->end() == Next->start()); + if (Merge || Iter->overlaps(*Next)) { + Iter->merge(*Next); + erase(Next); + continue; + } + ++Iter; + } +} + + +// Compute a range A-B and add it to the list. +void HexagonBlockRanges::RangeList::addsub(const IndexRange &A, + const IndexRange &B) { + // Exclusion of non-overlapping ranges makes some checks simpler + // later in this function. + if (!A.overlaps(B)) { + // A - B = A. + add(A); + return; + } + + IndexType AS = A.start(), AE = A.end(); + IndexType BS = B.start(), BE = B.end(); + + // If AE is None, then A is included in B, since A and B overlap. + // The result of subtraction if empty, so just return. + if (AE == IndexType::None) + return; + + if (AS < BS) { + // A starts before B. + // AE cannot be None since A and B overlap. + assert(AE != IndexType::None); + // Add the part of A that extends on the "less" side of B. + add(AS, BS, A.Fixed, false); + } + + if (BE < AE) { + // BE cannot be Exit here. + if (BE == IndexType::None) + add(BS, AE, A.Fixed, false); + else + add(BE, AE, A.Fixed, false); + } +} + + +// Subtract a given range from each element in the list. +void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) { + // Cannot assume that the list is unionized (i.e. contains only non- + // overlapping ranges. + RangeList T; + for (iterator Next, I = begin(); I != end(); I = Next) { + IndexRange &Rg = *I; + if (Rg.overlaps(Range)) { + T.addsub(Rg, Range); + Next = this->erase(I); + } else { + Next = std::next(I); + } + } + include(T); +} + + +HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B) + : Block(B) { + IndexType Idx = IndexType::First; + First = Idx; + for (auto &In : B) { + if (In.isDebugValue()) + continue; + assert(getIndex(&In) == IndexType::None && "Instruction already in map"); + Map.insert(std::make_pair(Idx, &In)); + ++Idx; + } + Last = B.empty() ? IndexType::None : unsigned(Idx)-1; +} + + +MachineInstr *HexagonBlockRanges::InstrIndexMap::getInstr(IndexType Idx) const { + auto F = Map.find(Idx); + return (F != Map.end()) ? F->second : 0; +} + + +HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex( + MachineInstr *MI) const { + for (auto &I : Map) + if (I.second == MI) + return I.first; + return IndexType::None; +} + + +HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex( + IndexType Idx) const { + assert (Idx != IndexType::None); + if (Idx == IndexType::Entry) + return IndexType::None; + if (Idx == IndexType::Exit) + return Last; + if (Idx == First) + return IndexType::Entry; + return unsigned(Idx)-1; +} + + +HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex( + IndexType Idx) const { + assert (Idx != IndexType::None); + if (Idx == IndexType::Entry) + return IndexType::First; + if (Idx == IndexType::Exit || Idx == Last) + return IndexType::None; + return unsigned(Idx)+1; +} + + +void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI, + MachineInstr *NewMI) { + for (auto &I : Map) { + if (I.second != OldMI) + continue; + if (NewMI != nullptr) + I.second = NewMI; + else + Map.erase(I.first); + break; + } +} + + +HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf) + : MF(mf), HST(mf.getSubtarget()), + TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()), + Reserved(TRI.getReservedRegs(mf)) { + // Consider all non-allocatable registers as reserved. + for (auto I = TRI.regclass_begin(), E = TRI.regclass_end(); I != E; ++I) { + auto *RC = *I; + if (RC->isAllocatable()) + continue; + for (unsigned R : *RC) + Reserved[R] = true; + } +} + + +HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns( + const MachineBasicBlock &B) { + RegisterSet LiveIns; + for (auto I : B.liveins()) + if (!Reserved[I.PhysReg]) + LiveIns.insert({I.PhysReg, 0}); + return LiveIns; +} + + +HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs( + RegisterRef R, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) { + RegisterSet SRs; + + if (R.Sub != 0) { + SRs.insert(R); + return SRs; + } + + if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) { + MCSubRegIterator I(R.Reg, &TRI); + if (!I.isValid()) + SRs.insert({R.Reg, 0}); + for (; I.isValid(); ++I) + SRs.insert({*I, 0}); + } else { + assert(TargetRegisterInfo::isVirtualRegister(R.Reg)); + auto &RC = *MRI.getRegClass(R.Reg); + unsigned PReg = *RC.begin(); + MCSubRegIndexIterator I(PReg, &TRI); + if (!I.isValid()) + SRs.insert({R.Reg, 0}); + for (; I.isValid(); ++I) + SRs.insert({R.Reg, I.getSubRegIndex()}); + } + return SRs; +} + + +void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, + RegToRangeMap &LiveMap) { + std::map LastDef, LastUse; + RegisterSet LiveOnEntry; + MachineBasicBlock &B = IndexMap.getBlock(); + MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); + + for (auto R : getLiveIns(B)) + for (auto S : expandToSubRegs(R, MRI, TRI)) + LiveOnEntry.insert(S); + + for (auto R : LiveOnEntry) + LastDef[R] = IndexType::Entry; + + auto closeRange = [&LastUse,&LastDef,&LiveMap] (RegisterRef R) -> void { + auto LD = LastDef[R], LU = LastUse[R]; + if (LD == IndexType::None) + LD = IndexType::Entry; + if (LU == IndexType::None) + LU = IndexType::Exit; + LiveMap[R].add(LD, LU, false, false); + LastUse[R] = LastDef[R] = IndexType::None; + }; + + for (auto &In : B) { + if (In.isDebugValue()) + continue; + IndexType Index = IndexMap.getIndex(&In); + // Process uses first. + for (auto &Op : In.operands()) { + if (!Op.isReg() || !Op.isUse() || Op.isUndef()) + continue; + RegisterRef R = { Op.getReg(), Op.getSubReg() }; + if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) + continue; + bool IsKill = Op.isKill(); + for (auto S : expandToSubRegs(R, MRI, TRI)) { + LastUse[S] = Index; + if (IsKill) + closeRange(S); + } + } + // Process defs. + for (auto &Op : In.operands()) { + if (!Op.isReg() || !Op.isDef() || Op.isUndef()) + continue; + RegisterRef R = { Op.getReg(), Op.getSubReg() }; + if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) + continue; + for (auto S : expandToSubRegs(R, MRI, TRI)) { + if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None) + closeRange(S); + LastDef[S] = Index; + } + } + } + + // Collect live-on-exit. + RegisterSet LiveOnExit; + for (auto *SB : B.successors()) + for (auto R : getLiveIns(*SB)) + for (auto S : expandToSubRegs(R, MRI, TRI)) + LiveOnExit.insert(S); + + for (auto R : LiveOnExit) + LastUse[R] = IndexType::Exit; + + // Process remaining registers. + RegisterSet Left; + for (auto &I : LastUse) + if (I.second != IndexType::None) + Left.insert(I.first); + for (auto &I : LastDef) + if (I.second != IndexType::None) + Left.insert(I.first); + for (auto R : Left) + closeRange(R); + + // Finalize the live ranges. + for (auto &P : LiveMap) + P.second.unionize(); +} + + +HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap( + InstrIndexMap &IndexMap) { + RegToRangeMap LiveMap; + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": index map\n" << IndexMap << '\n'); + computeInitialLiveRanges(IndexMap, LiveMap); + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": live map\n" + << PrintRangeMap(LiveMap, TRI) << '\n'); + return LiveMap; +} + + +HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap( + InstrIndexMap &IndexMap, RegToRangeMap &LiveMap) { + RegToRangeMap DeadMap; + + auto addDeadRanges = [&IndexMap,&LiveMap,&DeadMap] (RegisterRef R) -> void { + auto F = LiveMap.find(R); + if (F == LiveMap.end() || F->second.empty()) { + DeadMap[R].add(IndexType::Entry, IndexType::Exit, false, false); + return; + } + + RangeList &RL = F->second; + RangeList::iterator A = RL.begin(), Z = RL.end()-1; + + // Try to create the initial range. + if (A->start() != IndexType::Entry) { + IndexType DE = IndexMap.getPrevIndex(A->start()); + if (DE != IndexType::Entry) + DeadMap[R].add(IndexType::Entry, DE, false, false); + } + + while (A != Z) { + // Creating a dead range that follows A. Pay attention to empty + // ranges (i.e. those ending with "None"). + IndexType AE = (A->end() == IndexType::None) ? A->start() : A->end(); + IndexType DS = IndexMap.getNextIndex(AE); + ++A; + IndexType DE = IndexMap.getPrevIndex(A->start()); + if (DS < DE) + DeadMap[R].add(DS, DE, false, false); + } + + // Try to create the final range. + if (Z->end() != IndexType::Exit) { + IndexType ZE = (Z->end() == IndexType::None) ? Z->start() : Z->end(); + IndexType DS = IndexMap.getNextIndex(ZE); + if (DS < IndexType::Exit) + DeadMap[R].add(DS, IndexType::Exit, false, false); + } + }; + + MachineFunction &MF = *IndexMap.getBlock().getParent(); + auto &MRI = MF.getRegInfo(); + unsigned NumRegs = TRI.getNumRegs(); + BitVector Visited(NumRegs); + for (unsigned R = 1; R < NumRegs; ++R) { + for (auto S : expandToSubRegs({R,0}, MRI, TRI)) { + if (Reserved[S.Reg] || Visited[S.Reg]) + continue; + addDeadRanges(S); + Visited[S.Reg] = true; + } + } + for (auto &P : LiveMap) + if (TargetRegisterInfo::isVirtualRegister(P.first.Reg)) + addDeadRanges(P.first); + + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": dead map\n" + << PrintRangeMap(DeadMap, TRI) << '\n'); + return DeadMap; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, + HexagonBlockRanges::IndexType Idx) { + if (Idx == HexagonBlockRanges::IndexType::None) + return OS << '-'; + if (Idx == HexagonBlockRanges::IndexType::Entry) + return OS << 'n'; + if (Idx == HexagonBlockRanges::IndexType::Exit) + return OS << 'x'; + return OS << unsigned(Idx)-HexagonBlockRanges::IndexType::First+1; +} + +// A mapping to translate between instructions and their indices. +raw_ostream &llvm::operator<<(raw_ostream &OS, + const HexagonBlockRanges::IndexRange &IR) { + OS << '[' << IR.start() << ':' << IR.end() << (IR.TiedEnd ? '}' : ']'); + if (IR.Fixed) + OS << '!'; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, + const HexagonBlockRanges::RangeList &RL) { + for (auto &R : RL) + OS << R << " "; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, + const HexagonBlockRanges::InstrIndexMap &M) { + for (auto &In : M.Block) { + HexagonBlockRanges::IndexType Idx = M.getIndex(&In); + OS << Idx << (Idx == M.Last ? ". " : " ") << In; + } + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, + const HexagonBlockRanges::PrintRangeMap &P) { + for (auto &I : P.Map) { + const HexagonBlockRanges::RangeList &RL = I.second; + OS << PrintReg(I.first.Reg, &P.TRI, I.first.Sub) << " -> " << RL << "\n"; + } + return OS; +} diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h new file mode 100644 index 000000000000..9c3f938f99eb --- /dev/null +++ b/lib/Target/Hexagon/HexagonBlockRanges.h @@ -0,0 +1,239 @@ +//===--- HexagonBlockRanges.h ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef HEXAGON_BLOCK_RANGES_H +#define HEXAGON_BLOCK_RANGES_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCRegisterInfo.h" // For MCPhysReg. +#include +#include +#include + +namespace llvm { + class Function; + class HexagonSubtarget; + class MachineBasicBlock; + class MachineFunction; + class MachineInstr; + class MCInstrDesc; + class raw_ostream; + class TargetInstrInfo; + class TargetRegisterClass; + class TargetRegisterInfo; + class Type; + +struct HexagonBlockRanges { + HexagonBlockRanges(MachineFunction &MF); + + struct RegisterRef { + unsigned Reg, Sub; + bool operator<(RegisterRef R) const { + return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub); + } + }; + typedef std::set RegisterSet; + + // This is to represent an "index", which is an abstraction of a position + // of an instruction within a basic block. + class IndexType { + public: + enum : unsigned { + None = 0, + Entry = 1, + Exit = 2, + First = 11 // 10th + 1st + }; + static bool isInstr(IndexType X) { return X.Index >= First; } + + IndexType() : Index(None) {} + IndexType(unsigned Idx) : Index(Idx) {} + operator unsigned() const; + bool operator== (unsigned x) const; + bool operator== (IndexType Idx) const; + bool operator!= (unsigned x) const; + bool operator!= (IndexType Idx) const; + IndexType operator++ (); + bool operator< (unsigned Idx) const; + bool operator< (IndexType Idx) const; + bool operator<= (IndexType Idx) const; + + private: + bool operator> (IndexType Idx) const; + bool operator>= (IndexType Idx) const; + + unsigned Index; + }; + + // A range of indices, essentially a representation of a live range. + // This is also used to represent "dead ranges", i.e. ranges where a + // register is dead. + class IndexRange : public std::pair { + public: + IndexRange() : Fixed(false), TiedEnd(false) {} + IndexRange(IndexType Start, IndexType End, bool F = false, bool T = false) + : std::pair(Start, End), Fixed(F), TiedEnd(T) {} + IndexType start() const { return first; } + IndexType end() const { return second; } + + bool operator< (const IndexRange &A) const { + return start() < A.start(); + } + bool overlaps(const IndexRange &A) const; + bool contains(const IndexRange &A) const; + void merge(const IndexRange &A); + + bool Fixed; // Can be renamed? "Fixed" means "no". + bool TiedEnd; // The end is not a use, but a dead def tied to a use. + + private: + void setStart(const IndexType &S) { first = S; } + void setEnd(const IndexType &E) { second = E; } + }; + + // A list of index ranges. This represents liveness of a register + // in a basic block. + class RangeList : public std::vector { + public: + void add(IndexType Start, IndexType End, bool Fixed, bool TiedEnd) { + push_back(IndexRange(Start, End, Fixed, TiedEnd)); + } + void add(const IndexRange &Range) { + push_back(Range); + } + void include(const RangeList &RL); + void unionize(bool MergeAdjacent = false); + void subtract(const IndexRange &Range); + + private: + void addsub(const IndexRange &A, const IndexRange &B); + }; + + class InstrIndexMap { + public: + InstrIndexMap(MachineBasicBlock &B); + MachineInstr *getInstr(IndexType Idx) const; + IndexType getIndex(MachineInstr *MI) const; + MachineBasicBlock &getBlock() const { return Block; } + IndexType getPrevIndex(IndexType Idx) const; + IndexType getNextIndex(IndexType Idx) const; + void replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI); + + friend raw_ostream &operator<< (raw_ostream &OS, const InstrIndexMap &Map); + IndexType First, Last; + + private: + MachineBasicBlock &Block; + std::map Map; + }; + + typedef std::map RegToRangeMap; + RegToRangeMap computeLiveMap(InstrIndexMap &IndexMap); + RegToRangeMap computeDeadMap(InstrIndexMap &IndexMap, RegToRangeMap &LiveMap); + static RegisterSet expandToSubRegs(RegisterRef R, + const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI); + + struct PrintRangeMap { + PrintRangeMap(const RegToRangeMap &M, const TargetRegisterInfo &I) + : Map(M), TRI(I) {} + + friend raw_ostream &operator<< (raw_ostream &OS, const PrintRangeMap &P); + private: + const RegToRangeMap ⤅ + const TargetRegisterInfo &TRI; + }; + +private: + RegisterSet getLiveIns(const MachineBasicBlock &B); + + void computeInitialLiveRanges(InstrIndexMap &IndexMap, + RegToRangeMap &LiveMap); + + MachineFunction &MF; + const HexagonSubtarget &HST; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + BitVector Reserved; +}; + + +inline HexagonBlockRanges::IndexType::operator unsigned() const { + assert(Index >= First); + return Index; +} + +inline bool HexagonBlockRanges::IndexType::operator== (unsigned x) const { + return Index == x; +} + +inline bool HexagonBlockRanges::IndexType::operator== (IndexType Idx) const { + return Index == Idx.Index; +} + +inline bool HexagonBlockRanges::IndexType::operator!= (unsigned x) const { + return Index != x; +} + +inline bool HexagonBlockRanges::IndexType::operator!= (IndexType Idx) const { + return Index != Idx.Index; +} + +inline +HexagonBlockRanges::IndexType HexagonBlockRanges::IndexType::operator++ () { + assert(Index != None); + assert(Index != Exit); + if (Index == Entry) + Index = First; + else + ++Index; + return *this; +} + +inline bool HexagonBlockRanges::IndexType::operator< (unsigned Idx) const { + return operator< (IndexType(Idx)); +} + +inline bool HexagonBlockRanges::IndexType::operator< (IndexType Idx) const { + // !(x < x). + if (Index == Idx.Index) + return false; + // !(None < x) for all x. + // !(x < None) for all x. + if (Index == None || Idx.Index == None) + return false; + // !(Exit < x) for all x. + // !(x < Entry) for all x. + if (Index == Exit || Idx.Index == Entry) + return false; + // Entry < x for all x != Entry. + // x < Exit for all x != Exit. + if (Index == Entry || Idx.Index == Exit) + return true; + + return Index < Idx.Index; +} + +inline bool HexagonBlockRanges::IndexType::operator<= (IndexType Idx) const { + return operator==(Idx) || operator<(Idx); +} + + +raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx); +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::IndexRange &IR); +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::RangeList &RL); +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::InstrIndexMap &M); +raw_ostream &operator<< (raw_ostream &OS, + const HexagonBlockRanges::PrintRangeMap &P); + +} // namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp new file mode 100644 index 000000000000..f042baf1ef05 --- /dev/null +++ b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp @@ -0,0 +1,211 @@ +//===--- HexagonBranchRelaxation.cpp - Identify and relax long jumps ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-brelax" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Since we have no exact knowledge of code layout, allow some safety buffer +// for jump target. This is measured in bytes. +static cl::opt BranchRelaxSafetyBuffer("branch-relax-safety-buffer", + cl::init(200), cl::Hidden, cl::ZeroOrMore, cl::desc("safety buffer size")); + +namespace llvm { + FunctionPass *createHexagonBranchRelaxation(); + void initializeHexagonBranchRelaxationPass(PassRegistry&); +} + +namespace { + struct HexagonBranchRelaxation : public MachineFunctionPass { + public: + static char ID; + HexagonBranchRelaxation() : MachineFunctionPass(ID) { + initializeHexagonBranchRelaxationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Hexagon Branch Relaxation"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + + bool relaxBranches(MachineFunction &MF); + void computeOffset(MachineFunction &MF, + DenseMap &BlockToInstOffset); + bool reGenerateBranch(MachineFunction &MF, + DenseMap &BlockToInstOffset); + bool isJumpOutOfRange(MachineInstr &MI, + DenseMap &BlockToInstOffset); + }; + + char HexagonBranchRelaxation::ID = 0; +} // end anonymous namespace + +INITIALIZE_PASS(HexagonBranchRelaxation, "hexagon-brelax", + "Hexagon Branch Relaxation", false, false) + +FunctionPass *llvm::createHexagonBranchRelaxation() { + return new HexagonBranchRelaxation(); +} + + +bool HexagonBranchRelaxation::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "****** Hexagon Branch Relaxation ******\n"); + + auto &HST = MF.getSubtarget(); + HII = HST.getInstrInfo(); + HRI = HST.getRegisterInfo(); + + bool Changed = false; + Changed = relaxBranches(MF); + return Changed; +} + + +void HexagonBranchRelaxation::computeOffset(MachineFunction &MF, + DenseMap &OffsetMap) { + // offset of the current instruction from the start. + unsigned InstOffset = 0; + for (auto &B : MF) { + if (B.getAlignment()) { + // Although we don't know the exact layout of the final code, we need + // to account for alignment padding somehow. This heuristic pads each + // aligned basic block according to the alignment value. + int ByteAlign = (1u << B.getAlignment()) - 1; + InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); + } + OffsetMap[&B] = InstOffset; + for (auto &MI : B.instrs()) + InstOffset += HII->getSize(&MI); + } +} + + +/// relaxBranches - For Hexagon, if the jump target/loop label is too far from +/// the jump/loop instruction then, we need to make sure that we have constant +/// extenders set for jumps and loops. + +/// There are six iterations in this phase. It's self explanatory below. +bool HexagonBranchRelaxation::relaxBranches(MachineFunction &MF) { + // Compute the offset of each basic block + // offset of the current instruction from the start. + // map for each instruction to the beginning of the function + DenseMap BlockToInstOffset; + computeOffset(MF, BlockToInstOffset); + + return reGenerateBranch(MF, BlockToInstOffset); +} + + +/// Check if a given instruction is: +/// - a jump to a distant target +/// - that exceeds its immediate range +/// If both conditions are true, it requires constant extension. +bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI, + DenseMap &BlockToInstOffset) { + MachineBasicBlock &B = *MI.getParent(); + auto FirstTerm = B.getFirstInstrTerminator(); + if (FirstTerm == B.instr_end()) + return false; + + unsigned InstOffset = BlockToInstOffset[&B]; + unsigned Distance = 0; + + // To save time, estimate exact position of a branch instruction + // as one at the end of the MBB. + // Number of instructions times typical instruction size. + InstOffset += HII->nonDbgBBSize(&B) * HEXAGON_INSTR_SIZE; + + MachineBasicBlock *TBB = NULL, *FBB = NULL; + SmallVector Cond; + + // Try to analyze this branch. + if (HII->analyzeBranch(B, TBB, FBB, Cond, false)) { + // Could not analyze it. See if this is something we can recognize. + // If it is a NVJ, it should always have its target in + // a fixed location. + if (HII->isNewValueJump(&*FirstTerm)) + TBB = FirstTerm->getOperand(HII->getCExtOpNum(&*FirstTerm)).getMBB(); + } + if (TBB && &MI == &*FirstTerm) { + Distance = std::abs((long long)InstOffset - BlockToInstOffset[TBB]) + + BranchRelaxSafetyBuffer; + return !HII->isJumpWithinBranchRange(&*FirstTerm, Distance); + } + if (FBB) { + // Look for second terminator. + auto SecondTerm = std::next(FirstTerm); + assert(SecondTerm != B.instr_end() && + (SecondTerm->isBranch() || SecondTerm->isCall()) && + "Bad second terminator"); + if (&MI != &*SecondTerm) + return false; + // Analyze the second branch in the BB. + Distance = std::abs((long long)InstOffset - BlockToInstOffset[FBB]) + + BranchRelaxSafetyBuffer; + return !HII->isJumpWithinBranchRange(&*SecondTerm, Distance); + } + return false; +} + + +bool HexagonBranchRelaxation::reGenerateBranch(MachineFunction &MF, + DenseMap &BlockToInstOffset) { + bool Changed = false; + + for (auto &B : MF) { + for (auto &MI : B) { + if (!MI.isBranch() || !isJumpOutOfRange(MI, BlockToInstOffset)) + continue; + DEBUG(dbgs() << "Long distance jump. isExtendable(" + << HII->isExtendable(&MI) << ") isConstExtended(" + << HII->isConstExtended(&MI) << ") " << MI); + + // Since we have not merged HW loops relaxation into + // this code (yet), soften our approach for the moment. + if (!HII->isExtendable(&MI) && !HII->isExtended(&MI)) { + DEBUG(dbgs() << "\tUnderimplemented relax branch instruction.\n"); + } else { + // Find which operand is expandable. + int ExtOpNum = HII->getCExtOpNum(&MI); + MachineOperand &MO = MI.getOperand(ExtOpNum); + // This need to be something we understand. So far we assume all + // branches have only MBB address as expandable field. + // If it changes, this will need to be expanded. + assert(MO.isMBB() && "Branch with unknown expandable field type"); + // Mark given operand as extended. + MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); + Changed = true; + } + } + } + return Changed; +} diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index efafdd007289..559bdfb16a6f 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" @@ -38,9 +37,9 @@ namespace { class HexagonCFGOptimizer : public MachineFunctionPass { private: - void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*); + void InvertAndChangeJumpTarget(MachineInstr &, MachineBasicBlock *); - public: +public: static char ID; HexagonCFGOptimizer() : MachineFunctionPass(ID) { initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); @@ -50,6 +49,10 @@ private: return "Hexagon CFG Optimizer"; } bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } }; @@ -65,14 +68,12 @@ static bool IsUnconditionalJump(int Opc) { return (Opc == Hexagon::J2_jump); } - -void -HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, - MachineBasicBlock* NewTarget) { +void HexagonCFGOptimizer::InvertAndChangeJumpTarget( + MachineInstr &MI, MachineBasicBlock *NewTarget) { const TargetInstrInfo *TII = - MI->getParent()->getParent()->getSubtarget().getInstrInfo(); + MI.getParent()->getParent()->getSubtarget().getInstrInfo(); int NewOpcode = 0; - switch(MI->getOpcode()) { + switch (MI.getOpcode()) { case Hexagon::J2_jumpt: NewOpcode = Hexagon::J2_jumpf; break; @@ -93,12 +94,15 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, llvm_unreachable("Cannot handle this case"); } - MI->setDesc(TII->get(NewOpcode)); - MI->getOperand(1).setMBB(NewTarget); + MI.setDesc(TII->get(NewOpcode)); + MI.getOperand(1).setMBB(NewTarget); } bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(*Fn.getFunction())) + return false; + // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { @@ -107,8 +111,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Traverse the basic block. MachineBasicBlock::iterator MII = MBB->getFirstTerminator(); if (MII != MBB->end()) { - MachineInstr *MI = MII; - int Opc = MI->getOpcode(); + MachineInstr &MI = *MII; + int Opc = MI.getOpcode(); if (IsConditionalBranch(Opc)) { // @@ -160,9 +164,9 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // The target of the unconditional branch must be JumpAroundTarget. // TODO: If not, we should not invert the unconditional branch. MachineBasicBlock* CondBranchTarget = nullptr; - if ((MI->getOpcode() == Hexagon::J2_jumpt) || - (MI->getOpcode() == Hexagon::J2_jumpf)) { - CondBranchTarget = MI->getOperand(1).getMBB(); + if (MI.getOpcode() == Hexagon::J2_jumpt || + MI.getOpcode() == Hexagon::J2_jumpf) { + CondBranchTarget = MI.getOperand(1).getMBB(); } if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) { @@ -174,6 +178,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Ensure that BB2 has one instruction -- an unconditional jump. if ((LayoutSucc->size() == 1) && IsUnconditionalJump(LayoutSucc->front().getOpcode())) { + assert(JumpAroundTarget && "jump target is needed to process second basic block"); MachineBasicBlock* UncondTarget = LayoutSucc->front().getOperand(0).getMBB(); // Check if the layout successor of BB2 is BB3. @@ -232,15 +237,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Public Constructor Functions //===----------------------------------------------------------------------===// -static void initializePassOnce(PassRegistry &Registry) { - PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg", - &HexagonCFGOptimizer::ID, nullptr, false, false); - Registry.registerPass(*PI, true); -} - -void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializePassOnce) -} +INITIALIZE_PASS(HexagonCFGOptimizer, "hexagon-cfg", "Hexagon CFG Optimizer", + false, false) FunctionPass *llvm::createHexagonCFGOptimizer() { return new HexagonCFGOptimizer(); diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp index 931db6687bf8..b612b11aed50 100644 --- a/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -90,8 +90,8 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); @@ -147,7 +147,7 @@ char HexagonCommonGEP::ID = 0; INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP", false, false) @@ -212,7 +212,6 @@ namespace { if (Comma) OS << ','; OS << "used"; - Comma = true; } OS << "} "; if (GN.Flags & GepNode::Root) @@ -1268,6 +1267,9 @@ void HexagonCommonGEP::removeDeadCode() { bool HexagonCommonGEP::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + // For now bail out on C++ exception handling. for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A) for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I) @@ -1276,7 +1278,7 @@ bool HexagonCommonGEP::runOnFunction(Function &F) { Fn = &F; DT = &getAnalysis().getDomTree(); - PDT = &getAnalysis(); + PDT = &getAnalysis().getPostDomTree(); LI = &getAnalysis().getLoopInfo(); Ctx = &F.getContext(); @@ -1295,7 +1297,7 @@ bool HexagonCommonGEP::runOnFunction(Function &F) { materialize(Loc); removeDeadCode(); -#ifdef XDEBUG +#ifdef EXPENSIVE_CHECKS // Run this only when expensive checks are enabled. verifyFunction(F); #endif diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 9fd863f6e153..face0f3f64b4 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -42,6 +42,11 @@ cl::opt IsCombinesDisabled("disable-merge-into-combines", cl::init(false), cl::desc("Disable merging into combines")); static +cl::opt IsConst64Disabled("disable-const64", + cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Disable generation of const64")); +static cl::opt MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store", cl::Hidden, cl::init(4), @@ -62,6 +67,8 @@ class HexagonCopyToCombine : public MachineFunctionPass { bool ShouldCombineAggressively; DenseSet PotentiallyNewifiableTFR; + SmallVector DbgMItoMove; + public: static char ID; @@ -79,15 +86,22 @@ public: bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + private: - MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1); + MachineInstr *findPairable(MachineInstr &I1, bool &DoInsertAtI1, + bool AllowC64); void findPotentialNewifiableTFRs(MachineBasicBlock &); - void combine(MachineInstr *I1, MachineInstr *I2, - MachineBasicBlock::iterator &MI, bool DoInsertAtI1); + void combine(MachineInstr &I1, MachineInstr &I2, + MachineBasicBlock::iterator &MI, bool DoInsertAtI1, + bool OptForSize); - bool isSafeToMoveTogether(MachineInstr *I1, MachineInstr *I2, + bool isSafeToMoveTogether(MachineInstr &I1, MachineInstr &I2, unsigned I1DestReg, unsigned I2DestReg, bool &DoInsertAtI1); @@ -102,6 +116,9 @@ private: void emitCombineII(MachineBasicBlock::iterator &Before, unsigned DestReg, MachineOperand &HiOperand, MachineOperand &LoOperand); + + void emitConst64(MachineBasicBlock::iterator &Before, unsigned DestReg, + MachineOperand &HiOperand, MachineOperand &LoOperand); }; } // End anonymous namespace. @@ -111,14 +128,13 @@ char HexagonCopyToCombine::ID = 0; INITIALIZE_PASS(HexagonCopyToCombine, "hexagon-copy-combine", "Hexagon Copy-To-Combine Pass", false, false) -static bool isCombinableInstType(MachineInstr *MI, - const HexagonInstrInfo *TII, +static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII, bool ShouldCombineAggressively) { - switch(MI->getOpcode()) { + switch (MI.getOpcode()) { case Hexagon::A2_tfr: { // A COPY instruction can be combined if its arguments are IntRegs (32bit). - const MachineOperand &Op0 = MI->getOperand(0); - const MachineOperand &Op1 = MI->getOperand(1); + const MachineOperand &Op0 = MI.getOperand(0); + const MachineOperand &Op1 = MI.getOperand(1); assert(Op0.isReg() && Op1.isReg()); unsigned DestReg = Op0.getReg(); @@ -130,8 +146,8 @@ static bool isCombinableInstType(MachineInstr *MI, case Hexagon::A2_tfrsi: { // A transfer-immediate can be combined if its argument is a signed 8bit // value. - const MachineOperand &Op0 = MI->getOperand(0); - const MachineOperand &Op1 = MI->getOperand(1); + const MachineOperand &Op0 = MI.getOperand(0); + const MachineOperand &Op1 = MI.getOperand(1); assert(Op0.isReg()); unsigned DestReg = Op0.getReg(); @@ -154,11 +170,10 @@ static bool isCombinableInstType(MachineInstr *MI, return false; } -template -static bool isGreaterThanNBitTFRI(const MachineInstr *I) { - if (I->getOpcode() == Hexagon::TFRI64_V4 || - I->getOpcode() == Hexagon::A2_tfrsi) { - const MachineOperand &Op = I->getOperand(1); +template static bool isGreaterThanNBitTFRI(const MachineInstr &I) { + if (I.getOpcode() == Hexagon::TFRI64_V4 || + I.getOpcode() == Hexagon::A2_tfrsi) { + const MachineOperand &Op = I.getOperand(1); return !Op.isImm() || !isInt(Op.getImm()); } return false; @@ -167,19 +182,34 @@ static bool isGreaterThanNBitTFRI(const MachineInstr *I) { /// areCombinableOperations - Returns true if the two instruction can be merge /// into a combine (ignoring register constraints). static bool areCombinableOperations(const TargetRegisterInfo *TRI, - MachineInstr *HighRegInst, - MachineInstr *LowRegInst) { - unsigned HiOpc = HighRegInst->getOpcode(); - unsigned LoOpc = LowRegInst->getOpcode(); + MachineInstr &HighRegInst, + MachineInstr &LowRegInst, bool AllowC64) { + unsigned HiOpc = HighRegInst.getOpcode(); + unsigned LoOpc = LowRegInst.getOpcode(); (void)HiOpc; // Fix compiler warning (void)LoOpc; // Fix compiler warning assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) && (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) && "Assume individual instructions are of a combinable type"); - // There is no combine of two constant extended values. + if (!AllowC64) { + // There is no combine of two constant extended values. + if (isGreaterThanNBitTFRI<8>(HighRegInst) && + isGreaterThanNBitTFRI<6>(LowRegInst)) + return false; + } + + // There is a combine of two constant extended values into CONST64, + // provided both constants are true immediates. + if (isGreaterThanNBitTFRI<16>(HighRegInst) && + isGreaterThanNBitTFRI<16>(LowRegInst)) + return (HighRegInst.getOperand(1).isImm() && + LowRegInst.getOperand(1).isImm()); + + // There is no combine of two constant extended values, unless handled above + // Make both 8-bit size checks to allow both combine (#,##) and combine(##,#) if (isGreaterThanNBitTFRI<8>(HighRegInst) && - isGreaterThanNBitTFRI<6>(LowRegInst)) + isGreaterThanNBitTFRI<8>(LowRegInst)) return false; return true; @@ -191,25 +221,23 @@ static bool isEvenReg(unsigned Reg) { return (Reg - Hexagon::R0) % 2 == 0; } -static void removeKillInfo(MachineInstr *MI, unsigned RegNotKilled) { - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - MachineOperand &Op = MI->getOperand(I); +static void removeKillInfo(MachineInstr &MI, unsigned RegNotKilled) { + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill()) continue; Op.setIsKill(false); } } -/// isUnsafeToMoveAcross - Returns true if it is unsafe to move a copy -/// instruction from \p UseReg to \p DestReg over the instruction \p I. -static bool isUnsafeToMoveAcross(MachineInstr *I, unsigned UseReg, - unsigned DestReg, - const TargetRegisterInfo *TRI) { - return (UseReg && (I->modifiesRegister(UseReg, TRI))) || - I->modifiesRegister(DestReg, TRI) || - I->readsRegister(DestReg, TRI) || - I->hasUnmodeledSideEffects() || - I->isInlineAsm() || I->isDebugValue(); +/// Returns true if it is unsafe to move a copy instruction from \p UseReg to +/// \p DestReg over the instruction \p MI. +static bool isUnsafeToMoveAcross(MachineInstr &MI, unsigned UseReg, + unsigned DestReg, + const TargetRegisterInfo *TRI) { + return (UseReg && (MI.modifiesRegister(UseReg, TRI))) || + MI.modifiesRegister(DestReg, TRI) || MI.readsRegister(DestReg, TRI) || + MI.hasUnmodeledSideEffects() || MI.isInlineAsm() || MI.isDebugValue(); } static unsigned UseReg(const MachineOperand& MO) { @@ -218,16 +246,16 @@ static unsigned UseReg(const MachineOperand& MO) { /// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such /// that the two instructions can be paired in a combine. -bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, - MachineInstr *I2, +bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, + MachineInstr &I2, unsigned I1DestReg, unsigned I2DestReg, bool &DoInsertAtI1) { - unsigned I2UseReg = UseReg(I2->getOperand(1)); + unsigned I2UseReg = UseReg(I2.getOperand(1)); // It is not safe to move I1 and I2 into one combine if I2 has a true // dependence on I1. - if (I2UseReg && I1->modifiesRegister(I2UseReg, TRI)) + if (I2UseReg && I1.modifiesRegister(I2UseReg, TRI)) return false; bool isSafe = true; @@ -246,7 +274,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, // uses I2's use reg we need to modify that (first) instruction to now kill // this reg. unsigned KilledOperand = 0; - if (I2->killsRegister(I2UseReg)) + if (I2.killsRegister(I2UseReg)) KilledOperand = I2UseReg; MachineInstr *KillingInstr = nullptr; @@ -257,7 +285,10 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, // * reads I2's def reg // * or has unmodelled side effects // we can't move I2 across it. - if (isUnsafeToMoveAcross(&*I, I2UseReg, I2DestReg, TRI)) { + if (I->isDebugValue()) + continue; + + if (isUnsafeToMoveAcross(*I, I2UseReg, I2DestReg, TRI)) { isSafe = false; break; } @@ -287,7 +318,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, // At O3 we got better results (dhrystone) by being more conservative here. if (!ShouldCombineAggressively) End = std::next(MachineBasicBlock::iterator(I2)); - unsigned I1UseReg = UseReg(I1->getOperand(1)); + unsigned I1UseReg = UseReg(I1.getOperand(1)); // Track killed operands. If we move across an instruction that kills our // operand, we need to update the kill information on the moved I1. It kills // the operand now. @@ -295,7 +326,8 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, unsigned KilledOperand = 0; while(++I != End) { - // If the intervening instruction I: + MachineInstr &MI = *I; + // If the intervening instruction MI: // * modifies I1's use reg // * modifies I1's def reg // * reads I1's def reg @@ -304,30 +336,36 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, // kill flag for a register (a removeRegisterKilled() analogous to // addRegisterKilled) that handles aliased register correctly. // * or has a killed aliased register use of I1's use reg - // %D4 = TFRI64 16 - // %R6 = TFR %R9 + // %D4 = A2_tfrpi 16 + // %R6 = A2_tfr %R9 // %R8 = KILL %R8, %D4 // If we want to move R6 = across the KILL instruction we would have // to remove the %D4 operand. For now, we are // conservative and disallow the move. // we can't move I1 across it. - if (isUnsafeToMoveAcross(I, I1UseReg, I1DestReg, TRI) || + if (MI.isDebugValue()) { + if (MI.readsRegister(I1DestReg, TRI)) // Move this instruction after I2. + DbgMItoMove.push_back(&MI); + continue; + } + + if (isUnsafeToMoveAcross(MI, I1UseReg, I1DestReg, TRI) || // Check for an aliased register kill. Bail out if we see one. - (!I->killsRegister(I1UseReg) && I->killsRegister(I1UseReg, TRI))) + (!MI.killsRegister(I1UseReg) && MI.killsRegister(I1UseReg, TRI))) return false; // Check for an exact kill (registers match). - if (I1UseReg && I->killsRegister(I1UseReg)) { + if (I1UseReg && MI.killsRegister(I1UseReg)) { assert(!KillingInstr && "Should only see one killing instruction"); KilledOperand = I1UseReg; - KillingInstr = &*I; + KillingInstr = &MI; } } if (KillingInstr) { - removeKillInfo(KillingInstr, KilledOperand); + removeKillInfo(*KillingInstr, KilledOperand); // Update I1 to set the kill flag. This flag will later be picked up by // the new COMBINE instruction. - bool Added = I1->addRegisterKilled(KilledOperand, TRI); + bool Added = I1.addRegisterKilled(KilledOperand, TRI); (void)Added; // suppress compiler warning assert(Added && "Must successfully update kill flag"); } @@ -342,14 +380,16 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, void HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { DenseMap LastDef; - for (MachineBasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { - MachineInstr *MI = I; + for (MachineInstr &MI : BB) { + if (MI.isDebugValue()) + continue; + // Mark TFRs that feed a potential new value store as such. - if(TII->mayBeNewStore(MI)) { + if (TII->mayBeNewStore(&MI)) { // Look for uses of TFR instructions. - for (unsigned OpdIdx = 0, OpdE = MI->getNumOperands(); OpdIdx != OpdE; + for (unsigned OpdIdx = 0, OpdE = MI.getNumOperands(); OpdIdx != OpdE; ++OpdIdx) { - MachineOperand &Op = MI->getOperand(OpdIdx); + MachineOperand &Op = MI.getOperand(OpdIdx); // Skip over anything except register uses. if (!Op.isReg() || !Op.isUse() || !Op.getReg()) @@ -360,14 +400,18 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { MachineInstr *DefInst = LastDef[Reg]; if (!DefInst) continue; - if (!isCombinableInstType(DefInst, TII, ShouldCombineAggressively)) + if (!isCombinableInstType(*DefInst, TII, ShouldCombineAggressively)) continue; // Only close newifiable stores should influence the decision. + // Ignore the debug instructions in between. MachineBasicBlock::iterator It(DefInst); unsigned NumInstsToDef = 0; - while (&*It++ != MI) - ++NumInstsToDef; + while (&*It != &MI) { + if (!It->isDebugValue()) + ++NumInstsToDef; + ++It; + } if (NumInstsToDef > MaxNumOfInstsBetweenNewValueStoreAndTFR) continue; @@ -380,17 +424,17 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { // Put instructions that last defined integer or double registers into the // map. - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - MachineOperand &Op = MI->getOperand(I); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !Op.isDef() || !Op.getReg()) continue; unsigned Reg = Op.getReg(); if (Hexagon::DoubleRegsRegClass.contains(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - LastDef[*SubRegs] = MI; + LastDef[*SubRegs] = &MI; } } else if (Hexagon::IntRegsRegClass.contains(Reg)) - LastDef[Reg] = MI; + LastDef[Reg] = &MI; } } } @@ -405,6 +449,9 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); + const Function *F = MF.getFunction(); + bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize); + // Combine aggressively (for code size) ShouldCombineAggressively = MF.getTarget().getOptLevel() <= CodeGenOpt::Default; @@ -418,11 +465,15 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { // Traverse instructions in basic block. for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end(); MI != End;) { - MachineInstr *I1 = MI++; + MachineInstr &I1 = *MI++; + + if (I1.isDebugValue()) + continue; + // Don't combine a TFR whose user could be newified (instructions that // define double registers can not be newified - Programmer's Ref Manual // 5.4.2 New-value stores). - if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I1)) + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(&I1)) continue; // Ignore instructions that are not combinable. @@ -430,12 +481,14 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { continue; // Find a second instruction that can be merged into a combine - // instruction. + // instruction. In addition, also find all the debug instructions that + // need to be moved along with it. bool DoInsertAtI1 = false; - MachineInstr *I2 = findPairable(I1, DoInsertAtI1); + DbgMItoMove.clear(); + MachineInstr *I2 = findPairable(I1, DoInsertAtI1, OptForSize); if (I2) { HasChanged = true; - combine(I1, I2, MI, DoInsertAtI1); + combine(I1, *I2, MI, DoInsertAtI1, OptForSize); } } } @@ -447,23 +500,28 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { /// COMBINE instruction or 0 if no such instruction can be found. Returns true /// in \p DoInsertAtI1 if the combine must be inserted at instruction \p I1 /// false if the combine must be inserted at the returned instruction. -MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1, - bool &DoInsertAtI1) { +MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1, + bool &DoInsertAtI1, + bool AllowC64) { MachineBasicBlock::iterator I2 = std::next(MachineBasicBlock::iterator(I1)); - unsigned I1DestReg = I1->getOperand(0).getReg(); - for (MachineBasicBlock::iterator End = I1->getParent()->end(); I2 != End; + while (I2->isDebugValue()) + ++I2; + + unsigned I1DestReg = I1.getOperand(0).getReg(); + + for (MachineBasicBlock::iterator End = I1.getParent()->end(); I2 != End; ++I2) { // Bail out early if we see a second definition of I1DestReg. if (I2->modifiesRegister(I1DestReg, TRI)) break; // Ignore non-combinable instructions. - if (!isCombinableInstType(I2, TII, ShouldCombineAggressively)) + if (!isCombinableInstType(*I2, TII, ShouldCombineAggressively)) continue; // Don't combine a TFR whose user could be newified. - if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(I2)) + if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(&*I2)) continue; unsigned I2DestReg = I2->getOperand(0).getReg(); @@ -478,15 +536,14 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1, // Check that the two instructions are combinable. V4 allows more // instructions to be merged into a combine. - // The order matters because in a TFRI we might can encode a int8 as the - // hi reg operand but only a uint6 as the low reg operand. - if ((IsI2LowReg && !areCombinableOperations(TRI, I1, I2)) || - (IsI1LowReg && !areCombinableOperations(TRI, I2, I1))) + // The order matters because in a A2_tfrsi we might can encode a int8 as + // the hi reg operand but only a uint6 as the low reg operand. + if ((IsI2LowReg && !areCombinableOperations(TRI, I1, *I2, AllowC64)) || + (IsI1LowReg && !areCombinableOperations(TRI, *I2, I1, AllowC64))) break; - if (isSafeToMoveTogether(I1, I2, I1DestReg, I2DestReg, - DoInsertAtI1)) - return I2; + if (isSafeToMoveTogether(I1, *I2, I1DestReg, I2DestReg, DoInsertAtI1)) + return &*I2; // Not safe. Stop searching. break; @@ -494,16 +551,17 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1, return nullptr; } -void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2, +void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2, MachineBasicBlock::iterator &MI, - bool DoInsertAtI1) { + bool DoInsertAtI1, bool OptForSize) { // We are going to delete I2. If MI points to I2 advance it to the next // instruction. - if ((MachineInstr *)MI == I2) ++MI; + if (MI == I2.getIterator()) + ++MI; // Figure out whether I1 or I2 goes into the lowreg part. - unsigned I1DestReg = I1->getOperand(0).getReg(); - unsigned I2DestReg = I2->getOperand(0).getReg(); + unsigned I1DestReg = I1.getOperand(0).getReg(); + unsigned I2DestReg = I2.getOperand(0).getReg(); bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; @@ -515,15 +573,17 @@ void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2, // Setup source operands. - MachineOperand &LoOperand = IsI1Loreg ? I1->getOperand(1) : - I2->getOperand(1); - MachineOperand &HiOperand = IsI1Loreg ? I2->getOperand(1) : - I1->getOperand(1); + MachineOperand &LoOperand = IsI1Loreg ? I1.getOperand(1) : I2.getOperand(1); + MachineOperand &HiOperand = IsI1Loreg ? I2.getOperand(1) : I1.getOperand(1); // Figure out which source is a register and which a constant. bool IsHiReg = HiOperand.isReg(); bool IsLoReg = LoOperand.isReg(); + // There is a combine of two constant extended values into CONST64. + bool IsC64 = OptForSize && LoOperand.isImm() && HiOperand.isImm() && + isGreaterThanNBitTFRI<16>(I1) && isGreaterThanNBitTFRI<16>(I2); + MachineBasicBlock::iterator InsertPt(DoInsertAtI1 ? I1 : I2); // Emit combine. if (IsHiReg && IsLoReg) @@ -532,11 +592,45 @@ void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2, emitCombineRI(InsertPt, DoubleRegDest, HiOperand, LoOperand); else if (IsLoReg) emitCombineIR(InsertPt, DoubleRegDest, HiOperand, LoOperand); + else if (IsC64 && !IsConst64Disabled) + emitConst64(InsertPt, DoubleRegDest, HiOperand, LoOperand); else emitCombineII(InsertPt, DoubleRegDest, HiOperand, LoOperand); - I1->eraseFromParent(); - I2->eraseFromParent(); + // Move debug instructions along with I1 if it's being + // moved towards I2. + if (!DoInsertAtI1 && DbgMItoMove.size() != 0) { + // Insert debug instructions at the new location before I2. + MachineBasicBlock *BB = InsertPt->getParent(); + for (auto NewMI : DbgMItoMove) { + // If iterator MI is pointing to DEBUG_VAL, make sure + // MI now points to next relevant instruction. + if (NewMI == (MachineInstr*)MI) + ++MI; + BB->splice(InsertPt, BB, NewMI); + } + } + + I1.eraseFromParent(); + I2.eraseFromParent(); +} + +void HexagonCopyToCombine::emitConst64(MachineBasicBlock::iterator &InsertPt, + unsigned DoubleDestReg, + MachineOperand &HiOperand, + MachineOperand &LoOperand) { + DEBUG(dbgs() << "Found a CONST64\n"); + + DebugLoc DL = InsertPt->getDebugLoc(); + MachineBasicBlock *BB = InsertPt->getParent(); + assert(LoOperand.isImm() && HiOperand.isImm() && + "Both operands must be immediate"); + + int64_t V = HiOperand.getImm(); + V = (V << 32) | (0x0ffffffffLL & LoOperand.getImm()); + BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::CONST64_Int_Real), + DoubleDestReg) + .addImm(V); } void HexagonCopyToCombine::emitCombineII(MachineBasicBlock::iterator &InsertPt, diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index ee0c318ffb5d..2665acd19fb1 100644 --- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -78,8 +78,6 @@ #include "HexagonTargetMachine.h" #include -#include -#include using namespace llvm; @@ -359,7 +357,7 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) // update the use of it after predication). PHI uses will be updated // to use a result of a MUX, and a MUX cannot be created for predicate // registers. - for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { if (!MO->isReg() || !MO->isDef()) continue; unsigned R = MO->getReg(); @@ -377,7 +375,7 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { - for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + for (ConstMIOperands MO(*MI); MO.isValid(); ++MO) { if (!MO->isReg() || !MO->isUse()) continue; unsigned R = MO->getReg(); @@ -445,7 +443,7 @@ unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const { } MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg()); MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg()); - if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3)) + if (!TII->isPredicable(*Def1) || !TII->isPredicable(*Def3)) Cost++; } return Cost; @@ -456,7 +454,7 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs( const MachineBasicBlock *B) const { unsigned PredDefs = 0; for (auto &MI : *B) { - for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { if (!MO->isReg() || !MO->isDef()) continue; unsigned R = MO->getReg(); @@ -721,7 +719,7 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, assert(COpc); MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc)) .addReg(PredR); - for (MIOperands MO(MI); MO.isValid(); ++MO) + for (MIOperands MO(*MI); MO.isValid(); ++MO) MIB.addOperand(*MO); // Set memory references. @@ -962,7 +960,7 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { // MRI.replaceVregUsesWith does not allow to update the subregister, // so instead of doing the use-iteration here, create a copy into a // "non-subregistered" register. - DebugLoc DL = PN->getDebugLoc(); + const DebugLoc &DL = PN->getDebugLoc(); const TargetRegisterClass *RC = MRI->getRegClass(DefR); NewR = MRI->createVirtualRegister(RC); NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR) @@ -980,7 +978,7 @@ void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *SB = *I; MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); for (P = SB->begin(); P != N; ++P) { - MachineInstr *PN = &*P; + MachineInstr &PN = *P; for (MIOperands MO(PN); MO.isValid(); ++MO) if (MO->isMBB() && MO->getMBB() == OldB) MO->setMBB(NewB); @@ -1034,6 +1032,9 @@ void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + auto &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index ce10aeadef94..bd5bb9cbc235 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -17,10 +17,10 @@ // // Liveness tracking aside, the main functionality of this pass is divided // into two steps. The first step is to replace an instruction -// vreg0 = C2_mux vreg0, vreg1, vreg2 +// vreg0 = C2_mux vreg1, vreg2, vreg3 // with a pair of conditional transfers -// vreg0 = A2_tfrt vreg0, vreg1 -// vreg0 = A2_tfrf vreg0, vreg2 +// vreg0 = A2_tfrt vreg1, vreg2 +// vreg0 = A2_tfrf vreg1, vreg3 // It is the intention that the execution of this pass could be terminated // after this step, and the code generated would be functionally correct. // @@ -60,12 +60,92 @@ // vreg3 = A2_tfrf vreg0, vreg2 // +// Splitting a definition of a register into two predicated transfers +// creates a complication in liveness tracking. Live interval computation +// will see both instructions as actual definitions, and will mark the +// first one as dead. The definition is not actually dead, and this +// situation will need to be fixed. For example: +// vreg1 = A2_tfrt ... ; marked as dead +// vreg1 = A2_tfrf ... +// +// Since any of the individual predicated transfers may end up getting +// removed (in case it is an identity copy), some pre-existing def may +// be marked as dead after live interval recomputation: +// vreg1 = ... ; marked as dead +// ... +// vreg1 = A2_tfrf ... ; if A2_tfrt is removed +// This case happens if vreg1 was used as a source in A2_tfrt, which means +// that is it actually live at the A2_tfrf, and so the now dead definition +// of vreg1 will need to be updated to non-dead at some point. +// +// This issue could be remedied by adding implicit uses to the predicated +// transfers, but this will create a problem with subsequent predication, +// since the transfers will no longer be possible to reorder. To avoid +// that, the initial splitting will not add any implicit uses. These +// implicit uses will be added later, after predication. The extra price, +// however, is that finding the locations where the implicit uses need +// to be added, and updating the live ranges will be more involved. +// +// An additional problem appears when subregister liveness tracking is +// enabled. In such a scenario, the live interval for the super-register +// will have live ranges for each subregister (i.e. subranges). This sub- +// range contains all liveness information about the subregister, except +// for one case: a "read-undef" flag from another subregister will not +// be reflected: given +// vreg1:subreg_hireg = ... ; "undefines" subreg_loreg +// the subrange for subreg_loreg will not have any indication that it is +// undefined at this point. Calculating subregister liveness based only +// on the information from the subrange may create a segment which spans +// over such a "read-undef" flag. This would create inconsistencies in +// the liveness data, resulting in assertions or incorrect code. +// Example: +// vreg1:subreg_loreg = ... +// vreg1:subreg_hireg = ... ; "undefines" subreg_loreg +// ... +// vreg1:subreg_loreg = A2_tfrt ... ; may end up with imp-use +// ; of subreg_loreg +// The remedy takes advantage of the fact, that at this point we have +// an unconditional definition of the subregister. What this means is +// that any preceding value in this subregister will be overwritten, +// or in other words, the last use before this def is a kill. This also +// implies that the first of the predicated transfers at this location +// should not have any implicit uses. +// Assume for a moment that no part of the corresponding super-register +// is used as a source. In such case, the entire super-register can be +// considered undefined immediately before this instruction. Because of +// that, we can insert an IMPLICIT_DEF of the super-register at this +// location, which will cause it to be reflected in all the associated +// subranges. What is important here is that if an IMPLICIT_DEF of +// subreg_loreg was used, we would lose the indication that subreg_hireg +// is also considered undefined. This could lead to having implicit uses +// incorrectly added. +// +// What is left is the two cases when the super-register is used as a +// source. +// * Case 1: the used part is the same as the one that is defined: +// vreg1 = ... +// ... +// vreg1:subreg_loreg = C2_mux ..., vreg1:subreg_loreg +// In the end, the subreg_loreg should be marked as live at the point of +// the splitting: +// vreg1:subreg_loreg = A2_tfrt ; should have imp-use +// vreg1:subreg_loreg = A2_tfrf ; should have imp-use +// Hence, an IMPLICIT_DEF of only vreg1:subreg_hireg would be sufficient. +// * Case 2: the used part does not overlap the part being defined: +// vreg1 = ... +// ... +// vreg1:subreg_loreg = C2_mux ..., vreg1:subreg_hireg +// For this case, we insert an IMPLICIT_DEF of vreg1:subreg_hireg after +// the C2_mux. + #define DEBUG_TYPE "expand-condsets" -#include "HexagonTargetMachine.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -76,6 +156,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + using namespace llvm; static cl::opt OptTfrLimit("expand-condsets-tfr-limit", @@ -103,22 +188,26 @@ namespace { initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); } - virtual const char *getPassName() const { + const char *getPassName() const override { return "Hexagon Expand Condsets"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: const HexagonInstrInfo *HII; const TargetRegisterInfo *TRI; + MachineDominatorTree *MDT; MachineRegisterInfo *MRI; LiveIntervals *LIS; + std::set LocalImpDefs; bool CoaLimitActive, TfrLimitActive; unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter; @@ -131,6 +220,9 @@ namespace { return Reg == RR.Reg && Sub == RR.Sub; } bool operator!= (RegisterRef RR) const { return !operator==(RR); } + bool operator< (RegisterRef RR) const { + return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub); + } unsigned Reg, Sub; }; @@ -138,44 +230,44 @@ namespace { enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; enum { Exec_Then = 0x10, Exec_Else = 0x20 }; unsigned getMaskForSub(unsigned Sub); - bool isCondset(const MachineInstr *MI); + bool isCondset(const MachineInstr &MI); + LaneBitmask getLaneMask(unsigned Reg, unsigned Sub); void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); - LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S); - LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S); - void makeDefined(unsigned Reg, SlotIndex S, bool SetDef); - void makeUndead(unsigned Reg, SlotIndex S); - void shrinkToUses(unsigned Reg, LiveInterval &LI); - void updateKillFlags(unsigned Reg, LiveInterval &LI); - void terminateSegment(LiveInterval::iterator LT, SlotIndex S, - LiveInterval &LI); - void addInstrToLiveness(MachineInstr *MI); - void removeInstrFromLiveness(MachineInstr *MI); + void removeImpDefSegments(LiveRange &Range); + void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range); + void updateKillFlags(unsigned Reg); + void updateDeadFlags(unsigned Reg); + void recalculateLiveInterval(unsigned Reg); + void removeInstr(MachineInstr &MI); + void updateLiveness(std::set &RegSet, bool Recalc, + bool UpdateKills, bool UpdateDeads); unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); - MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR, - unsigned DstSR, const MachineOperand &PredOp, bool Cond); - bool split(MachineInstr *MI); - bool splitInBlock(MachineBasicBlock &B); + MachineInstr *genCondTfrFor(MachineOperand &SrcOp, + MachineBasicBlock::iterator At, unsigned DstR, + unsigned DstSR, const MachineOperand &PredOp, bool PredSense, + bool ReadUndef, bool ImpUse); + bool split(MachineInstr &MI, std::set &UpdRegs); + bool splitInBlock(MachineBasicBlock &B, std::set &UpdRegs); bool isPredicable(MachineInstr *MI); MachineInstr *getReachingDefForPred(RegisterRef RD, MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); - bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses); - bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown); - void predicateAt(RegisterRef RD, MachineInstr *MI, - MachineBasicBlock::iterator Where, unsigned PredR, bool Cond); + bool canMoveOver(MachineInstr &MI, ReferenceMap &Defs, ReferenceMap &Uses); + bool canMoveMemTo(MachineInstr &MI, MachineInstr &ToI, bool IsDown); + void predicateAt(const MachineOperand &DefOp, MachineInstr &MI, + MachineBasicBlock::iterator Where, + const MachineOperand &PredOp, bool Cond, + std::set &UpdRegs); void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, bool Cond, MachineBasicBlock::iterator First, MachineBasicBlock::iterator Last); - bool predicate(MachineInstr *TfrI, bool Cond); - bool predicateInBlock(MachineBasicBlock &B); - - void postprocessUndefImplicitUses(MachineBasicBlock &B); - void removeImplicitUses(MachineInstr *MI); - void removeImplicitUses(MachineBasicBlock &B); + bool predicate(MachineInstr &TfrI, bool Cond, std::set &UpdRegs); + bool predicateInBlock(MachineBasicBlock &B, + std::set &UpdRegs); bool isIntReg(RegisterRef RR, unsigned &BW); bool isIntraBlocks(LiveInterval &LI); @@ -186,6 +278,13 @@ namespace { char HexagonExpandCondsets::ID = 0; +INITIALIZE_PASS_BEGIN(HexagonExpandCondsets, "expand-condsets", + "Hexagon Expand Condsets", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets", + "Hexagon Expand Condsets", false, false) unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { switch (Sub) { @@ -199,9 +298,8 @@ unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { llvm_unreachable("Invalid subregister"); } - -bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) { - unsigned Opc = MI->getOpcode(); +bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); switch (Opc) { case Hexagon::C2_mux: case Hexagon::C2_muxii: @@ -215,6 +313,13 @@ bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) { } +LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) + : MRI->getMaxLaneMaskForVReg(Reg); +} + + void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec) { unsigned Mask = getMaskForSub(RR.Sub) | Exec; @@ -238,408 +343,231 @@ bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, } -LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI, - SlotIndex S) { - for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { - if (I->start >= S) - return I; - } - return LI.end(); -} - - -LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI, - SlotIndex S) { - LiveInterval::iterator P = LI.end(); - for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { - if (I->end > S) - return P; - P = I; - } - return P; -} - - -/// Find the implicit use of register Reg in slot index S, and make sure -/// that the "defined" flag is set to SetDef. While the mux expansion is -/// going on, predicated instructions will have implicit uses of the -/// registers that are being defined. This is to keep any preceding -/// definitions live. If there is no preceding definition, the implicit -/// use will be marked as "undef", otherwise it will be "defined". This -/// function is used to update the flag. -void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S, - bool SetDef) { - if (!S.isRegister()) - return; - MachineInstr *MI = LIS->getInstructionFromIndex(S); - assert(MI && "Expecting instruction"); - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) - continue; - bool IsDef = !Op.isUndef(); - if (Op.isImplicit() && IsDef != SetDef) - Op.setIsUndef(!SetDef); - } -} - - -void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) { - // If S is a block boundary, then there can still be a dead def reaching - // this point. Instead of traversing the CFG, queue start points of all - // live segments that begin with a register, and end at a block boundary. - // This may "resurrect" some truly dead definitions, but doing so is - // harmless. - SmallVector Defs; - if (S.isBlock()) { - LiveInterval &LI = LIS->getInterval(Reg); - for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { - if (!I->start.isRegister() || !I->end.isBlock()) - continue; - MachineInstr *MI = LIS->getInstructionFromIndex(I->start); - Defs.push_back(MI); - } - } else if (S.isRegister()) { - MachineInstr *MI = LIS->getInstructionFromIndex(S); - Defs.push_back(MI); - } - - for (unsigned i = 0, n = Defs.size(); i < n; ++i) { - MachineInstr *MI = Defs[i]; +void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { + auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void { + // Set the flag on a use of Reg whose lane mask is contained in LM. + MachineInstr *MI = LIS->getInstructionFromIndex(K); for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) - continue; - Op.setIsDead(false); - } - } -} - - -/// Shrink the segments in the live interval for a given register to the last -/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this -/// function will not mark any definitions of Reg as dead. The reason for this -/// is that this function is used while a MUX instruction is being expanded, -/// or while a conditional copy is undergoing predication. During these -/// processes, there may be defs present in the instruction sequence that have -/// not yet been removed, or there may be missing uses that have not yet been -/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible, -/// but since it does not extend any intervals that are too short, we need to -/// pre-emptively extend them here in anticipation of further changes. -void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) { - SmallVector Deads; - LIS->shrinkToUses(&LI, &Deads); - // Need to undo the deadification made by "shrinkToUses". It's easier to - // do it here, since we have a list of all instructions that were just - // marked as dead. - for (unsigned i = 0, n = Deads.size(); i < n; ++i) { - MachineInstr *MI = Deads[i]; - // Clear the "dead" flag. - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) continue; - Op.setIsDead(false); + LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg()); + if ((SLM & LM) == SLM) { + // Only set the kill flag on the first encountered use of Reg in this + // instruction. + Op.setIsKill(true); + break; + } } - // Extend the live segment to the beginning of the next one. - LiveInterval::iterator End = LI.end(); - SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot(); - LiveInterval::iterator T = LI.FindSegmentContaining(S); - assert(T != End); - LiveInterval::iterator N = std::next(T); - if (N != End) - T->end = N->start; - else - T->end = LIS->getMBBEndIdx(MI->getParent()); - } - updateKillFlags(Reg, LI); -} - + }; -/// Given an updated live interval LI for register Reg, update the kill flags -/// in instructions using Reg to reflect the liveness changes. -void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) { - MRI->clearKillFlags(Reg); - for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { - SlotIndex EX = I->end; - if (!EX.isRegister()) + LiveInterval &LI = LIS->getInterval(Reg); + for (auto I = LI.begin(), E = LI.end(); I != E; ++I) { + if (!I->end.isRegister()) continue; - MachineInstr *MI = LIS->getInstructionFromIndex(EX); - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + // Do not mark the end of the segment as , if the next segment + // starts with a predicated instruction. + auto NextI = std::next(I); + if (NextI != E && NextI->start.isRegister()) { + MachineInstr *DefI = LIS->getInstructionFromIndex(NextI->start); + if (HII->isPredicated(*DefI)) continue; - // Only set the kill flag on the first encountered use of Reg in this - // instruction. - Op.setIsKill(true); - break; } + bool WholeReg = true; + if (LI.hasSubRanges()) { + auto EndsAtI = [I] (LiveInterval::SubRange &S) -> bool { + LiveRange::iterator F = S.find(I->end); + return F != S.end() && I->end == F->end; + }; + // Check if all subranges end at I->end. If so, make sure to kill + // the whole register. + for (LiveInterval::SubRange &S : LI.subranges()) { + if (EndsAtI(S)) + KillAt(I->end, S.LaneMask); + else + WholeReg = false; + } + } + if (WholeReg) + KillAt(I->end, MRI->getMaxLaneMaskForVReg(Reg)); } } -/// When adding a new instruction to liveness, the newly added definition -/// will start a new live segment. This may happen at a position that falls -/// within an existing live segment. In such case that live segment needs to -/// be truncated to make room for the new segment. Ultimately, the truncation -/// will occur at the last use, but for now the segment can be terminated -/// right at the place where the new segment will start. The segments will be -/// shrunk-to-uses later. -void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT, - SlotIndex S, LiveInterval &LI) { - // Terminate the live segment pointed to by LT within a live interval LI. - if (LT == LI.end()) - return; +void HexagonExpandCondsets::removeImpDefSegments(LiveRange &Range) { + auto StartImpDef = [this] (LiveRange::Segment &S) -> bool { + return S.start.isRegister() && + LocalImpDefs.count(LIS->getInstructionFromIndex(S.start)); + }; + Range.segments.erase(std::remove_if(Range.begin(), Range.end(), StartImpDef), + Range.end()); +} - VNInfo *OldVN = LT->valno; - SlotIndex EX = LT->end; - LT->end = S; - // If LT does not end at a block boundary, the termination is done. - if (!EX.isBlock()) +void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, + LiveRange &Range) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (Range.empty()) return; - // If LT ended at a block boundary, it's possible that its value number - // is picked up at the beginning other blocks. Create a new value number - // and change such blocks to use it instead. - VNInfo *NewVN = 0; - for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { - if (!I->start.isBlock() || I->valno != OldVN) - continue; - // Generate on-demand a new value number that is defined by the - // block beginning (i.e. -phi). - if (!NewVN) - NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator()); - I->valno = NewVN; - } -} + auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> bool { + if (!Op.isReg() || !Op.isDef()) + return false; + unsigned DR = Op.getReg(), DSR = Op.getSubReg(); + if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg) + return false; + LaneBitmask SLM = getLaneMask(DR, DSR); + return (SLM & LM) != 0; + }; + // The splitting step will create pairs of predicated definitions without + // any implicit uses (since implicit uses would interfere with predication). + // This can cause the reaching defs to become dead after live range + // recomputation, even though they are not really dead. + // We need to identify predicated defs that need implicit uses, and + // dead defs that are not really dead, and correct both problems. + + SetVector Defs; + auto Dominate = [this] (SetVector &Defs, + MachineBasicBlock *Dest) -> bool { + for (MachineBasicBlock *D : Defs) + if (D != Dest && MDT->dominates(D, Dest)) + return true; -/// Add the specified instruction to live intervals. This function is used -/// to update the live intervals while the program code is being changed. -/// Neither the expansion of a MUX, nor the predication are atomic, and this -/// function is used to update the live intervals while these transformations -/// are being done. -void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) { - SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI) - : LIS->getInstructionIndex(MI); - DEBUG(dbgs() << "adding liveness info for instr\n " << MX << " " << *MI); - - MX = MX.getRegSlot(); - bool Predicated = HII->isPredicated(MI); - MachineBasicBlock *MB = MI->getParent(); - - // Strip all implicit uses from predicated instructions. They will be - // added again, according to the updated information. - if (Predicated) - removeImplicitUses(MI); - - // For each def in MI we need to insert a new live segment starting at MX - // into the interval. If there already exists a live segment in the interval - // that contains MX, we need to terminate it at MX. - SmallVector Defs; - for (auto &Op : MI->operands()) - if (Op.isReg() && Op.isDef()) - Defs.push_back(RegisterRef(Op)); - - for (unsigned i = 0, n = Defs.size(); i < n; ++i) { - unsigned DefR = Defs[i].Reg; - LiveInterval &LID = LIS->getInterval(DefR); - DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI) - << " with interval\n " << LID << "\n"); - // If MX falls inside of an existing live segment, terminate it. - LiveInterval::iterator LT = LID.FindSegmentContaining(MX); - if (LT != LID.end()) - terminateSegment(LT, MX, LID); - DEBUG(dbgs() << "after terminating segment\n " << LID << "\n"); - - // Create a new segment starting from MX. - LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX); - SlotIndex EX; - VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator()); - if (N == LID.end()) { - // There is no live segment after MX. End this segment at the end of - // the block. - EX = LIS->getMBBEndIdx(MB); - } else { - // If the next segment starts at the block boundary, end the new segment - // at the boundary of the preceding block (i.e. the previous index). - // Otherwise, end the segment at the beginning of the next segment. In - // either case it will be "shrunk-to-uses" later. - EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start; + MachineBasicBlock *Entry = &Dest->getParent()->front(); + SetVector Work(Dest->pred_begin(), Dest->pred_end()); + for (unsigned i = 0; i < Work.size(); ++i) { + MachineBasicBlock *B = Work[i]; + if (Defs.count(B)) + continue; + if (B == Entry) + return false; + for (auto *P : B->predecessors()) + Work.insert(P); } - if (Predicated) { - // Predicated instruction will have an implicit use of the defined - // register. This is necessary so that this definition will not make - // any previous definitions dead. If there are no previous live - // segments, still add the implicit use, but make it "undef". - // Because of the implicit use, the preceding definition is not - // dead. Mark is as such (if necessary). - MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true); - ImpUse.setSubReg(Defs[i].Sub); - bool Undef = false; - if (P == LID.end()) - Undef = true; - else { - // If the previous segment extends to the end of the previous block, - // the end index may actually be the beginning of this block. If - // the previous segment ends at a block boundary, move it back by one, - // to get the proper block for it. - SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; - MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); - if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) - Undef = true; - } - if (!Undef) { - makeUndead(DefR, P->valno->def); - // We are adding a live use, so extend the previous segment to - // include it. - P->end = MX; - } else { - ImpUse.setIsUndef(true); - } + return true; + }; - if (!MI->readsRegister(DefR)) - MI->addOperand(ImpUse); - if (N != LID.end()) - makeDefined(DefR, N->start, true); - } - LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN); - LID.addSegment(NR); - DEBUG(dbgs() << "added a new segment " << NR << "\n " << LID << "\n"); - shrinkToUses(DefR, LID); - DEBUG(dbgs() << "updated imp-uses: " << *MI); - LID.verify(); + // First, try to extend live range within individual basic blocks. This + // will leave us only with dead defs that do not reach any predicated + // defs in the same block. + SmallVector PredDefs; + for (auto &Seg : Range) { + if (!Seg.start.isRegister()) + continue; + MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); + if (LocalImpDefs.count(DefI)) + continue; + Defs.insert(DefI->getParent()); + if (HII->isPredicated(*DefI)) + PredDefs.push_back(Seg.start); + } + for (auto &SI : PredDefs) { + MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); + if (Range.extendInBlock(LIS->getMBBStartIdx(BB), SI)) + SI = SlotIndex(); } - // For each use in MI: - // - If there is no live segment that contains MX for the used register, - // extend the previous one. Ignore implicit uses. - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef()) + // Calculate reachability for those predicated defs that were not handled + // by the in-block extension. + SmallVector ExtTo; + for (auto &SI : PredDefs) { + if (!SI.isValid()) + continue; + MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); + if (BB->pred_empty()) + continue; + // If the defs from this range reach SI via all predecessors, it is live. + if (Dominate(Defs, BB)) + ExtTo.push_back(SI); + } + LIS->extendToIndices(Range, ExtTo); + + // Remove flags from all defs that are not dead after live range + // extension, and collect all def operands. They will be used to generate + // the necessary implicit uses. + std::set DefRegs; + for (auto &Seg : Range) { + if (!Seg.start.isRegister()) continue; - unsigned UseR = Op.getReg(); - LiveInterval &LIU = LIS->getInterval(UseR); - // Find the last segment P that starts before MX. - LiveInterval::iterator P = LIU.FindSegmentContaining(MX); - if (P == LIU.end()) - P = prevSegment(LIU, MX); - - assert(P != LIU.end() && "MI uses undefined register?"); - SlotIndex EX = P->end; - // If P contains MX, there is not much to do. - if (EX > MX) { - Op.setIsKill(false); + MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); + if (LocalImpDefs.count(DefI)) continue; + for (auto &Op : DefI->operands()) { + if (Seg.start.isDead() || !IsRegDef(Op)) + continue; + DefRegs.insert(Op); + Op.setIsDead(false); } - // Otherwise, extend P to "next(MX)". - P->end = MX.getNextIndex(); - Op.setIsKill(true); - // Get the old "kill" instruction, and remove the kill flag. - if (MachineInstr *KI = LIS->getInstructionFromIndex(MX)) - KI->clearRegisterKills(UseR, nullptr); - shrinkToUses(UseR, LIU); - LIU.verify(); } -} -/// Update the live interval information to reflect the removal of the given -/// instruction from the program. As with "addInstrToLiveness", this function -/// is called while the program code is being changed. -void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) { - SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot(); - DEBUG(dbgs() << "removing instr\n " << MX << " " << *MI); + // Finally, add implicit uses to each predicated def that is reached + // by other defs. Remove segments started by implicit-defs first, since + // they do not define registers. + removeImpDefSegments(Range); - // For each def in MI: - // If MI starts a live segment, merge this segment with the previous segment. - // - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isDef()) + for (auto &Seg : Range) { + if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot())) continue; - unsigned DefR = Op.getReg(); - LiveInterval &LID = LIS->getInterval(DefR); - LiveInterval::iterator LT = LID.FindSegmentContaining(MX); - assert(LT != LID.end() && "Expecting live segments"); - DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI) - << " with interval\n " << LID << "\n"); - if (LT->start != MX) + MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); + if (!HII->isPredicated(*DefI)) continue; + MachineFunction &MF = *DefI->getParent()->getParent(); + // Construct the set of all necessary implicit uses, based on the def + // operands in the instruction. + std::set ImpUses; + for (auto &Op : DefI->operands()) + if (Op.isReg() && Op.isDef() && DefRegs.count(Op)) + ImpUses.insert(Op); + for (RegisterRef R : ImpUses) + MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub); + } +} - VNInfo *MVN = LT->valno; - if (LT != LID.begin()) { - // If the current live segment is not the first, the task is easy. If - // the previous segment continues into the current block, extend it to - // the end of the current one, and merge the value numbers. - // Otherwise, remove the current segment, and make the end of it "undef". - LiveInterval::iterator P = std::prev(LT); - SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end; - MachineBasicBlock *MB = MI->getParent(); - MachineBasicBlock *PB = LIS->getMBBFromIndex(PE); - if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) { - makeDefined(DefR, LT->end, false); - LID.removeSegment(*LT); - } else { - // Make the segments adjacent, so that merge-vn can also merge the - // segments. - P->end = LT->start; - makeUndead(DefR, P->valno->def); - LID.MergeValueNumberInto(MVN, P->valno); - } - } else { - LiveInterval::iterator N = std::next(LT); - LiveInterval::iterator RmB = LT, RmE = N; - while (N != LID.end()) { - // Iterate until the first register-based definition is found - // (i.e. skip all block-boundary entries). - LiveInterval::iterator Next = std::next(N); - if (N->start.isRegister()) { - makeDefined(DefR, N->start, false); - break; - } - if (N->end.isRegister()) { - makeDefined(DefR, N->end, false); - RmE = Next; - break; - } - RmE = Next; - N = Next; - } - // Erase the segments in one shot to avoid invalidating iterators. - LID.segments.erase(RmB, RmE); - } - - bool VNUsed = false; - for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) { - if (I->valno != MVN) - continue; - VNUsed = true; - break; - } - if (!VNUsed) - MVN->markUnused(); - DEBUG(dbgs() << "new interval: "); - if (!LID.empty()) { - DEBUG(dbgs() << LID << "\n"); - LID.verify(); - } else { - DEBUG(dbgs() << "\n"); - LIS->removeInterval(DefR); +void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) { + LiveInterval &LI = LIS->getInterval(Reg); + if (LI.hasSubRanges()) { + for (LiveInterval::SubRange &S : LI.subranges()) { + updateDeadsInRange(Reg, S.LaneMask, S); + LIS->shrinkToUses(S, Reg); + // LI::shrinkToUses will add segments started by implicit-defs. + // Remove them again. + removeImpDefSegments(S); } + LI.clear(); + LIS->constructMainRangeFromSubranges(LI); + } else { + updateDeadsInRange(Reg, MRI->getMaxLaneMaskForVReg(Reg), LI); } +} - // For uses there is nothing to do. The intervals will be updated via - // shrinkToUses. - SmallVector Uses; - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isUse()) - continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) - continue; - Uses.push_back(R); - } + +void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) { + LIS->removeInterval(Reg); + LIS->createAndComputeVirtRegInterval(Reg); +} + +void HexagonExpandCondsets::removeInstr(MachineInstr &MI) { LIS->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - for (unsigned i = 0, n = Uses.size(); i < n; ++i) { - LiveInterval &LI = LIS->getInterval(Uses[i]); - shrinkToUses(Uses[i], LI); + MI.eraseFromParent(); +} + + +void HexagonExpandCondsets::updateLiveness(std::set &RegSet, + bool Recalc, bool UpdateKills, bool UpdateDeads) { + UpdateKills |= UpdateDeads; + for (auto R : RegSet) { + if (Recalc) + recalculateLiveInterval(R); + if (UpdateKills) + MRI->clearKillFlags(R); + if (UpdateDeads) + updateDeadFlags(R); + // Fixing flags may extend live ranges, so reset flags + // after that. + if (UpdateKills) + updateKillFlags(R); + LIS->getInterval(R).verify(); } } @@ -647,7 +575,7 @@ void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) { /// Get the opcode for a conditional transfer of the value in SO (source /// operand). The condition (true/false) is given in Cond. unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, - bool Cond) { + bool IfTrue) { using namespace Hexagon; if (SO.isReg()) { unsigned PhysR; @@ -664,14 +592,14 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); switch (RC->getSize()) { case 4: - return Cond ? A2_tfrt : A2_tfrf; + return IfTrue ? A2_tfrt : A2_tfrf; case 8: - return Cond ? A2_tfrpt : A2_tfrpf; + return IfTrue ? A2_tfrpt : A2_tfrpf; } llvm_unreachable("Invalid register operand"); } if (SO.isImm() || SO.isFPImm()) - return Cond ? C2_cmoveit : C2_cmoveif; + return IfTrue ? C2_cmoveit : C2_cmoveif; llvm_unreachable("Unexpected source operand"); } @@ -680,12 +608,13 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, /// destination register DstR:DstSR, and using the predicate register from /// PredOp. The Cond argument specifies whether the predicate is to be /// if(PredOp), or if(!PredOp). -MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp, - unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) { +MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp, + MachineBasicBlock::iterator At, + unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, + bool PredSense, bool ReadUndef, bool ImpUse) { MachineInstr *MI = SrcOp.getParent(); - MachineBasicBlock &B = *MI->getParent(); - MachineBasicBlock::iterator At = MI; - DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &B = *At->getParent(); + const DebugLoc &DL = MI->getDebugLoc(); // Don't avoid identity copies here (i.e. if the source and the destination // are the same registers). It is actually better to generate them here, @@ -693,62 +622,101 @@ MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp, // step. The predication will remove such a copy if it is unable to /// predicate. - unsigned Opc = getCondTfrOpcode(SrcOp, Cond); - MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc)) - .addReg(DstR, RegState::Define, DstSR) + unsigned Opc = getCondTfrOpcode(SrcOp, PredSense); + unsigned State = RegState::Define | (ReadUndef ? RegState::Undef : 0); + MachineInstrBuilder MIB = BuildMI(B, At, DL, HII->get(Opc)) + .addReg(DstR, State, DstSR) .addOperand(PredOp) .addOperand(SrcOp); + // We don't want any kills yet. - TfrI->clearKillInfo(); - DEBUG(dbgs() << "created an initial copy: " << *TfrI); - return TfrI; + MIB->clearKillInfo(); + DEBUG(dbgs() << "created an initial copy: " << *MIB); + return &*MIB; } /// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function /// performs all necessary changes to complete the replacement. -bool HexagonExpandCondsets::split(MachineInstr *MI) { +bool HexagonExpandCondsets::split(MachineInstr &MI, + std::set &UpdRegs) { if (TfrLimitActive) { if (TfrCounter >= TfrLimit) return false; TfrCounter++; } - DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber() - << ": " << *MI); - MachineOperand &MD = MI->getOperand(0); // Definition - MachineOperand &MP = MI->getOperand(1); // Predicate register + DEBUG(dbgs() << "\nsplitting BB#" << MI.getParent()->getNumber() << ": " + << MI); + MachineOperand &MD = MI.getOperand(0); // Definition + MachineOperand &MP = MI.getOperand(1); // Predicate register + MachineOperand &MS1 = MI.getOperand(2); // Source value #1 + MachineOperand &MS2 = MI.getOperand(3); // Source value #2 assert(MD.isDef()); unsigned DR = MD.getReg(), DSR = MD.getSubReg(); + bool ReadUndef = MD.isUndef(); + MachineBasicBlock::iterator At = MI; + + if (ReadUndef && DSR != 0 && MRI->shouldTrackSubRegLiveness(DR)) { + unsigned NewSR = 0; + MachineBasicBlock::iterator DefAt = At; + bool SameReg = (MS1.isReg() && DR == MS1.getReg()) || + (MS2.isReg() && DR == MS2.getReg()); + if (SameReg) { + NewSR = (DSR == Hexagon::subreg_loreg) ? Hexagon::subreg_hireg + : Hexagon::subreg_loreg; + // Advance the insertion point if the subregisters differ between + // the source and the target (with the same super-register). + // Note: this case has never occured during tests. + if ((MS1.isReg() && NewSR == MS1.getSubReg()) || + (MS2.isReg() && NewSR == MS2.getSubReg())) + ++DefAt; + } + // Use "At", since "DefAt" may be end(). + MachineBasicBlock &B = *At->getParent(); + DebugLoc DL = At->getDebugLoc(); + auto ImpD = BuildMI(B, DefAt, DL, HII->get(TargetOpcode::IMPLICIT_DEF)) + .addReg(DR, RegState::Define, NewSR); + LIS->InsertMachineInstrInMaps(*ImpD); + LocalImpDefs.insert(&*ImpD); + } // First, create the two invididual conditional transfers, and add each // of them to the live intervals information. Do that first and then remove // the old instruction from live intervals. - if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true)) - addInstrToLiveness(TfrT); - if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false)) - addInstrToLiveness(TfrF); - removeInstrFromLiveness(MI); - + MachineInstr *TfrT = + genCondTfrFor(MI.getOperand(2), At, DR, DSR, MP, true, ReadUndef, false); + MachineInstr *TfrF = + genCondTfrFor(MI.getOperand(3), At, DR, DSR, MP, false, ReadUndef, true); + LIS->InsertMachineInstrInMaps(*TfrT); + LIS->InsertMachineInstrInMaps(*TfrF); + + // Will need to recalculate live intervals for all registers in MI. + for (auto &Op : MI.operands()) + if (Op.isReg()) + UpdRegs.insert(Op.getReg()); + + removeInstr(MI); return true; } -/// Split all MUX instructions in the given block into pairs of contitional +/// Split all MUX instructions in the given block into pairs of conditional /// transfers. -bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) { +bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B, + std::set &UpdRegs) { bool Changed = false; MachineBasicBlock::iterator I, E, NextI; for (I = B.begin(), E = B.end(); I != E; I = NextI) { NextI = std::next(I); - if (isCondset(I)) - Changed |= split(I); + if (isCondset(*I)) + Changed |= split(*I, UpdRegs); } return Changed; } bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { - if (HII->isPredicated(MI) || !HII->isPredicable(MI)) + if (HII->isPredicated(*MI) || !HII->isPredicable(*MI)) return false; if (MI->hasUnmodeledSideEffects() || MI->mayStore()) return false; @@ -784,8 +752,8 @@ MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, MachineInstr *MI = &*I; // Check if this instruction can be ignored, i.e. if it is predicated // on the complementary condition. - if (PredValid && HII->isPredicated(MI)) { - if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI))) + if (PredValid && HII->isPredicated(*MI)) { + if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(*MI))) continue; } @@ -821,12 +789,12 @@ MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, /// the maps Defs and Uses. These maps reflect the conditional defs and uses /// that depend on the same predicate register to allow moving instructions /// over instructions predicated on the opposite condition. -bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs, - ReferenceMap &Uses) { +bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs, + ReferenceMap &Uses) { // In order to be able to safely move MI over instructions that define // "Defs" and use "Uses", no def operand from MI can be defined or used // and no use operand can be defined. - for (auto &Op : MI->operands()) { + for (auto &Op : MI.operands()) { if (!Op.isReg()) continue; RegisterRef RR = Op; @@ -848,19 +816,19 @@ bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs, /// Check if the instruction accessing memory (TheI) can be moved to the /// location ToI. -bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI, - bool IsDown) { - bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore(); +bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI, + bool IsDown) { + bool IsLoad = TheI.mayLoad(), IsStore = TheI.mayStore(); if (!IsLoad && !IsStore) return true; if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI)) return true; - if (TheI->hasUnmodeledSideEffects()) + if (TheI.hasUnmodeledSideEffects()) return false; MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI; MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI; - bool Ordered = TheI->hasOrderedMemoryRef(); + bool Ordered = TheI.hasOrderedMemoryRef(); // Search for aliased memory reference in (StartI, EndI). for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) { @@ -883,8 +851,11 @@ bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI, /// Generate a predicated version of MI (where the condition is given via /// PredR and Cond) at the point indicated by Where. -void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI, - MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) { +void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp, + MachineInstr &MI, + MachineBasicBlock::iterator Where, + const MachineOperand &PredOp, bool Cond, + std::set &UpdRegs) { // The problem with updating live intervals is that we can move one def // past another def. In particular, this can happen when moving an A2_tfrt // over an A2_tfrf defining the same register. From the point of view of @@ -896,33 +867,34 @@ void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI, // target location, (2) update liveness, (3) delete the old instruction, // and (4) update liveness again. - MachineBasicBlock &B = *MI->getParent(); + MachineBasicBlock &B = *MI.getParent(); DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI.getOpcode(); unsigned PredOpc = HII->getCondOpcode(Opc, !Cond); MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc)); - unsigned Ox = 0, NP = MI->getNumOperands(); + unsigned Ox = 0, NP = MI.getNumOperands(); // Skip all defs from MI first. while (Ox < NP) { - MachineOperand &MO = MI->getOperand(Ox); + MachineOperand &MO = MI.getOperand(Ox); if (!MO.isReg() || !MO.isDef()) break; Ox++; } // Add the new def, then the predicate register, then the rest of the // operands. - MB.addReg(RD.Reg, RegState::Define, RD.Sub); - MB.addReg(PredR); + MB.addReg(DefOp.getReg(), getRegState(DefOp), DefOp.getSubReg()); + MB.addReg(PredOp.getReg(), PredOp.isUndef() ? RegState::Undef : 0, + PredOp.getSubReg()); while (Ox < NP) { - MachineOperand &MO = MI->getOperand(Ox); + MachineOperand &MO = MI.getOperand(Ox); if (!MO.isReg() || !MO.isImplicit()) MB.addOperand(MO); Ox++; } MachineFunction &MF = *B.getParent(); - MachineInstr::mmo_iterator I = MI->memoperands_begin(); - unsigned NR = std::distance(I, MI->memoperands_end()); + MachineInstr::mmo_iterator I = MI.memoperands_begin(); + unsigned NR = std::distance(I, MI.memoperands_end()); MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR); for (unsigned i = 0; i < NR; ++i) MemRefs[i] = *I++; @@ -930,7 +902,11 @@ void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI, MachineInstr *NewI = MB; NewI->clearKillInfo(); - addInstrToLiveness(NewI); + LIS->InsertMachineInstrInMaps(*NewI); + + for (auto &Op : NewI->operands()) + if (Op.isReg()) + UpdRegs.insert(Op.getReg()); } @@ -945,9 +921,9 @@ void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, MachineInstr *MI = &*I; // Do not touch instructions that are not predicated, or are predicated // on the opposite condition. - if (!HII->isPredicated(MI)) + if (!HII->isPredicated(*MI)) continue; - if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI))) + if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(*MI))) continue; for (auto &Op : MI->operands()) { @@ -965,22 +941,27 @@ void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, /// For a given conditional copy, predicate the definition of the source of /// the copy under the given condition (using the same predicate register as /// the copy). -bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) { +bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, + std::set &UpdRegs) { // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). - unsigned Opc = TfrI->getOpcode(); + unsigned Opc = TfrI.getOpcode(); (void)Opc; assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") - << ": " << *TfrI); + << ": " << TfrI); - MachineOperand &MD = TfrI->getOperand(0); - MachineOperand &MP = TfrI->getOperand(1); - MachineOperand &MS = TfrI->getOperand(2); + MachineOperand &MD = TfrI.getOperand(0); + MachineOperand &MP = TfrI.getOperand(1); + MachineOperand &MS = TfrI.getOperand(2); // The source operand should be a . This is not strictly necessary, // but it makes things a lot simpler. Otherwise, we would need to rename // some registers, which would complicate the transformation considerably. if (!MS.isKill()) return false; + // Avoid predicating instructions that define a subregister if subregister + // liveness tracking is not enabled. + if (MD.getSubReg() && !MRI->shouldTrackSubRegLiveness(MD.getReg())) + return false; RegisterRef RT(MS); unsigned PredR = MP.getReg(); @@ -1014,8 +995,8 @@ bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) { // By default assume that the instruction executes on the same condition // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). unsigned Exec = Exec_Then | Exec_Else; - if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR)) - Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else; + if (PredValid && HII->isPredicated(*MI) && MI->readsRegister(PredR)) + Exec = (Cond == HII->isPredicatedTrue(*MI)) ? Exec_Then : Exec_Else; for (auto &Op : MI->operands()) { if (!Op.isReg()) @@ -1059,48 +1040,53 @@ bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) { // If the target register of the TfrI (RD) is not used or defined between // DefI and TfrI, consider moving TfrI up to DefI. bool CanUp = canMoveOver(TfrI, Defs, Uses); - bool CanDown = canMoveOver(DefI, Defs, Uses); + bool CanDown = canMoveOver(*DefI, Defs, Uses); // The TfrI does not access memory, but DefI could. Check if it's safe // to move DefI down to TfrI. if (DefI->mayLoad() || DefI->mayStore()) - if (!canMoveMemTo(DefI, TfrI, true)) + if (!canMoveMemTo(*DefI, TfrI, true)) CanDown = false; DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") << ", can move down: " << (CanDown ? "yes\n" : "no\n")); MachineBasicBlock::iterator PastDefIt = std::next(DefIt); if (CanUp) - predicateAt(RD, DefI, PastDefIt, PredR, Cond); + predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs); else if (CanDown) - predicateAt(RD, DefI, TfrIt, PredR, Cond); + predicateAt(MD, *DefI, TfrIt, MP, Cond, UpdRegs); else return false; - if (RT != RD) + if (RT != RD) { renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt); + UpdRegs.insert(RT.Reg); + } - // Delete the user of RT first (it should work either way, but this order - // of deleting is more natural). - removeInstrFromLiveness(TfrI); - removeInstrFromLiveness(DefI); + removeInstr(TfrI); + removeInstr(*DefI); return true; } /// Predicate all cases of conditional copies in the specified block. -bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) { +bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, + std::set &UpdRegs) { bool Changed = false; MachineBasicBlock::iterator I, E, NextI; for (I = B.begin(), E = B.end(); I != E; I = NextI) { NextI = std::next(I); unsigned Opc = I->getOpcode(); if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { - bool Done = predicate(I, (Opc == Hexagon::A2_tfrt)); + bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs); if (!Done) { // If we didn't predicate I, we may need to remove it in case it is // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1. - if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) - removeInstrFromLiveness(I); + if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) { + for (auto &Op : I->operands()) + if (Op.isReg()) + UpdRegs.insert(Op.getReg()); + removeInstr(*I); + } } Changed |= Done; } @@ -1109,51 +1095,6 @@ bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) { } -void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) { - for (unsigned i = MI->getNumOperands(); i > 0; --i) { - MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && MO.isUse() && MO.isImplicit()) - MI->RemoveOperand(i-1); - } -} - - -void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) { - for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { - MachineInstr *MI = &*I; - if (HII->isPredicated(MI)) - removeImplicitUses(MI); - } -} - - -void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) { - // Implicit uses that are "undef" are only meaningful (outside of the - // internals of this pass) when the instruction defines a subregister, - // and the implicit-undef use applies to the defined register. In such - // cases, the proper way to record the information in the IR is to mark - // the definition as "undef", which will be interpreted as "read-undef". - typedef SmallSet RegisterSet; - for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { - MachineInstr *MI = &*I; - RegisterSet Undefs; - for (unsigned i = MI->getNumOperands(); i > 0; --i) { - MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) { - MI->RemoveOperand(i-1); - Undefs.insert(MO.getReg()); - } - } - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isDef() || !Op.getSubReg()) - continue; - if (Undefs.count(Op.getReg())) - Op.setIsUndef(true); - } - } -} - - bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) return false; @@ -1236,7 +1177,7 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { while (L2.begin() != L2.end()) L2.removeSegment(*L2.begin()); - updateKillFlags(R1.Reg, L1); + updateKillFlags(R1.Reg); DEBUG(dbgs() << "coalesced: " << L1 << "\n"); L1.verify(); @@ -1253,7 +1194,7 @@ bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { MachineBasicBlock &B = *I; for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) { MachineInstr *MI = &*J; - if (!isCondset(MI)) + if (!isCondset(*MI)) continue; MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); if (!S1.isReg() && !S2.isReg()) @@ -1290,13 +1231,13 @@ bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { if (S1.isReg()) { RegisterRef RS = S1; MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); - if (!RDef || !HII->isPredicable(RDef)) + if (!RDef || !HII->isPredicable(*RDef)) Done = coalesceRegisters(RD, RegisterRef(S1)); } if (!Done && S2.isReg()) { RegisterRef RS = S2; MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); - if (!RDef || !HII->isPredicable(RDef)) + if (!RDef || !HII->isPredicable(*RDef)) Done = coalesceRegisters(RD, RegisterRef(S2)); } Changed |= Done; @@ -1306,32 +1247,59 @@ bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) { bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + HII = static_cast(MF.getSubtarget().getInstrInfo()); TRI = MF.getSubtarget().getRegisterInfo(); + MDT = &getAnalysis(); LIS = &getAnalysis(); MRI = &MF.getRegInfo(); + LocalImpDefs.clear(); + + DEBUG(LIS->print(dbgs() << "Before expand-condsets\n", + MF.getFunction()->getParent())); bool Changed = false; + std::set SplitUpd, PredUpd; // Try to coalesce the target of a mux with one of its sources. // This could eliminate a register copy in some circumstances. Changed |= coalesceSegments(MF); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - // First, simply split all muxes into a pair of conditional transfers - // and update the live intervals to reflect the new arrangement. - // This is done mainly to make the live interval update simpler, than it - // would be while trying to predicate instructions at the same time. - Changed |= splitInBlock(*I); - // Traverse all blocks and collapse predicable instructions feeding - // conditional transfers into predicated instructions. - // Walk over all the instructions again, so we may catch pre-existing - // cases that were not created in the previous step. - Changed |= predicateInBlock(*I); - } + // First, simply split all muxes into a pair of conditional transfers + // and update the live intervals to reflect the new arrangement. The + // goal is to update the kill flags, since predication will rely on + // them. + for (auto &B : MF) + Changed |= splitInBlock(B, SplitUpd); + updateLiveness(SplitUpd, true, true, false); + + // Traverse all blocks and collapse predicable instructions feeding + // conditional transfers into predicated instructions. + // Walk over all the instructions again, so we may catch pre-existing + // cases that were not created in the previous step. + for (auto &B : MF) + Changed |= predicateInBlock(B, PredUpd); + + updateLiveness(PredUpd, true, true, true); + // Remove from SplitUpd all registers contained in PredUpd to avoid + // unnecessary liveness recalculation. + std::set Diff; + std::set_difference(SplitUpd.begin(), SplitUpd.end(), + PredUpd.begin(), PredUpd.end(), + std::inserter(Diff, Diff.begin())); + updateLiveness(Diff, false, false, true); + + for (auto *ImpD : LocalImpDefs) + removeInstr(*ImpD); + + DEBUG({ + if (Changed) + LIS->print(dbgs() << "After expand-condsets\n", + MF.getFunction()->getParent()); + }); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - postprocessUndefImplicitUses(*I); return Changed; } @@ -1340,18 +1308,6 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { // Public Constructor Functions //===----------------------------------------------------------------------===// -static void initializePassOnce(PassRegistry &Registry) { - const char *Name = "Hexagon Expand Condsets"; - PassInfo *PI = new PassInfo(Name, "expand-condsets", - &HexagonExpandCondsets::ID, 0, false, false); - Registry.registerPass(*PI, true); -} - -void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializePassOnce) -} - - FunctionPass *llvm::createHexagonExpandCondsets() { return new HexagonExpandCondsets(); } diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp deleted file mode 100644 index 6e2dbc06b124..000000000000 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ /dev/null @@ -1,357 +0,0 @@ -//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// The Hexagon processor has no instructions that load or store predicate -// registers directly. So, when these registers must be spilled a general -// purpose register must be found and the value copied to/from it from/to -// the predicate register. This code currently does not use the register -// scavenger mechanism available in the allocator. There are two registers -// reserved to allow spilling/restoring predicate registers. One is used to -// hold the predicate value. The other is used when stack frame offsets are -// too large. -// -//===----------------------------------------------------------------------===// - -#include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonSubtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" - -using namespace llvm; - - -namespace llvm { - FunctionPass *createHexagonExpandPredSpillCode(); - void initializeHexagonExpandPredSpillCodePass(PassRegistry&); -} - - -namespace { - -class HexagonExpandPredSpillCode : public MachineFunctionPass { - public: - static char ID; - HexagonExpandPredSpillCode() : MachineFunctionPass(ID) { - PassRegistry &Registry = *PassRegistry::getPassRegistry(); - initializeHexagonExpandPredSpillCodePass(Registry); - } - - const char *getPassName() const override { - return "Hexagon Expand Predicate Spill Code"; - } - bool runOnMachineFunction(MachineFunction &Fn) override; -}; - - -char HexagonExpandPredSpillCode::ID = 0; - - -bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { - - const HexagonSubtarget &QST = Fn.getSubtarget(); - const HexagonInstrInfo *TII = QST.getInstrInfo(); - - // Loop over all of the basic blocks. - for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock *MBB = &*MBBb; - // Traverse the basic block. - for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); - ++MII) { - MachineInstr *MI = MII; - int Opc = MI->getOpcode(); - if (Opc == Hexagon::S2_storerb_pci_pseudo || - Opc == Hexagon::S2_storerh_pci_pseudo || - Opc == Hexagon::S2_storeri_pci_pseudo || - Opc == Hexagon::S2_storerd_pci_pseudo || - Opc == Hexagon::S2_storerf_pci_pseudo) { - unsigned Opcode; - if (Opc == Hexagon::S2_storerd_pci_pseudo) - Opcode = Hexagon::S2_storerd_pci; - else if (Opc == Hexagon::S2_storeri_pci_pseudo) - Opcode = Hexagon::S2_storeri_pci; - else if (Opc == Hexagon::S2_storerh_pci_pseudo) - Opcode = Hexagon::S2_storerh_pci; - else if (Opc == Hexagon::S2_storerf_pci_pseudo) - Opcode = Hexagon::S2_storerf_pci; - else if (Opc == Hexagon::S2_storerb_pci_pseudo) - Opcode = Hexagon::S2_storerb_pci; - else - llvm_unreachable("wrong Opc"); - MachineOperand &Op0 = MI->getOperand(0); - MachineOperand &Op1 = MI->getOperand(1); - MachineOperand &Op2 = MI->getOperand(2); - MachineOperand &Op3 = MI->getOperand(3); // Modifier value. - MachineOperand &Op4 = MI->getOperand(4); - // Emit a "C6 = Rn, C6 is the control register for M0". - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), - Hexagon::C6)->addOperand(Op3); - // Replace the pseude circ_ldd by the real circ_ldd. - MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Opcode)); - NewMI->addOperand(Op0); - NewMI->addOperand(Op1); - NewMI->addOperand(Op4); - NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, - false, /*isDef*/ - false, /*isImpl*/ - true /*isKill*/)); - NewMI->addOperand(Op2); - MII = MBB->erase(MI); - --MII; - } else if (Opc == Hexagon::L2_loadrd_pci_pseudo || - Opc == Hexagon::L2_loadri_pci_pseudo || - Opc == Hexagon::L2_loadrh_pci_pseudo || - Opc == Hexagon::L2_loadruh_pci_pseudo|| - Opc == Hexagon::L2_loadrb_pci_pseudo || - Opc == Hexagon::L2_loadrub_pci_pseudo) { - unsigned Opcode; - if (Opc == Hexagon::L2_loadrd_pci_pseudo) - Opcode = Hexagon::L2_loadrd_pci; - else if (Opc == Hexagon::L2_loadri_pci_pseudo) - Opcode = Hexagon::L2_loadri_pci; - else if (Opc == Hexagon::L2_loadrh_pci_pseudo) - Opcode = Hexagon::L2_loadrh_pci; - else if (Opc == Hexagon::L2_loadruh_pci_pseudo) - Opcode = Hexagon::L2_loadruh_pci; - else if (Opc == Hexagon::L2_loadrb_pci_pseudo) - Opcode = Hexagon::L2_loadrb_pci; - else if (Opc == Hexagon::L2_loadrub_pci_pseudo) - Opcode = Hexagon::L2_loadrub_pci; - else - llvm_unreachable("wrong Opc"); - - MachineOperand &Op0 = MI->getOperand(0); - MachineOperand &Op1 = MI->getOperand(1); - MachineOperand &Op2 = MI->getOperand(2); - MachineOperand &Op4 = MI->getOperand(4); // Modifier value. - MachineOperand &Op5 = MI->getOperand(5); - // Emit a "C6 = Rn, C6 is the control register for M0". - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), - Hexagon::C6)->addOperand(Op4); - // Replace the pseude circ_ldd by the real circ_ldd. - MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Opcode)); - NewMI->addOperand(Op1); - NewMI->addOperand(Op0); - NewMI->addOperand(Op2); - NewMI->addOperand(Op5); - NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, - false, /*isDef*/ - false, /*isImpl*/ - true /*isKill*/)); - MII = MBB->erase(MI); - --MII; - } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo || - Opc == Hexagon::L2_loadri_pbr_pseudo || - Opc == Hexagon::L2_loadrh_pbr_pseudo || - Opc == Hexagon::L2_loadruh_pbr_pseudo|| - Opc == Hexagon::L2_loadrb_pbr_pseudo || - Opc == Hexagon::L2_loadrub_pbr_pseudo) { - unsigned Opcode; - if (Opc == Hexagon::L2_loadrd_pbr_pseudo) - Opcode = Hexagon::L2_loadrd_pbr; - else if (Opc == Hexagon::L2_loadri_pbr_pseudo) - Opcode = Hexagon::L2_loadri_pbr; - else if (Opc == Hexagon::L2_loadrh_pbr_pseudo) - Opcode = Hexagon::L2_loadrh_pbr; - else if (Opc == Hexagon::L2_loadruh_pbr_pseudo) - Opcode = Hexagon::L2_loadruh_pbr; - else if (Opc == Hexagon::L2_loadrb_pbr_pseudo) - Opcode = Hexagon::L2_loadrb_pbr; - else if (Opc == Hexagon::L2_loadrub_pbr_pseudo) - Opcode = Hexagon::L2_loadrub_pbr; - else - llvm_unreachable("wrong Opc"); - MachineOperand &Op0 = MI->getOperand(0); - MachineOperand &Op1 = MI->getOperand(1); - MachineOperand &Op2 = MI->getOperand(2); - MachineOperand &Op4 = MI->getOperand(4); // Modifier value. - // Emit a "C6 = Rn, C6 is the control register for M0". - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), - Hexagon::C6)->addOperand(Op4); - // Replace the pseudo brev_ldd by the real brev_ldd. - MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Opcode)); - NewMI->addOperand(Op1); - NewMI->addOperand(Op0); - NewMI->addOperand(Op2); - NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, - false, /*isDef*/ - false, /*isImpl*/ - true /*isKill*/)); - MII = MBB->erase(MI); - --MII; - } else if (Opc == Hexagon::S2_storerd_pbr_pseudo || - Opc == Hexagon::S2_storeri_pbr_pseudo || - Opc == Hexagon::S2_storerh_pbr_pseudo || - Opc == Hexagon::S2_storerb_pbr_pseudo || - Opc == Hexagon::S2_storerf_pbr_pseudo) { - unsigned Opcode; - if (Opc == Hexagon::S2_storerd_pbr_pseudo) - Opcode = Hexagon::S2_storerd_pbr; - else if (Opc == Hexagon::S2_storeri_pbr_pseudo) - Opcode = Hexagon::S2_storeri_pbr; - else if (Opc == Hexagon::S2_storerh_pbr_pseudo) - Opcode = Hexagon::S2_storerh_pbr; - else if (Opc == Hexagon::S2_storerf_pbr_pseudo) - Opcode = Hexagon::S2_storerf_pbr; - else if (Opc == Hexagon::S2_storerb_pbr_pseudo) - Opcode = Hexagon::S2_storerb_pbr; - else - llvm_unreachable("wrong Opc"); - MachineOperand &Op0 = MI->getOperand(0); - MachineOperand &Op1 = MI->getOperand(1); - MachineOperand &Op2 = MI->getOperand(2); - MachineOperand &Op3 = MI->getOperand(3); // Modifier value. - // Emit a "C6 = Rn, C6 is the control register for M0". - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr), - Hexagon::C6)->addOperand(Op3); - // Replace the pseudo brev_ldd by the real brev_ldd. - MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Opcode)); - NewMI->addOperand(Op0); - NewMI->addOperand(Op1); - NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0, - false, /*isDef*/ - false, /*isImpl*/ - true /*isKill*/)); - NewMI->addOperand(Op2); - MII = MBB->erase(MI); - --MII; - } else if (Opc == Hexagon::STriw_pred) { - // STriw_pred [R30], ofst, SrcReg; - unsigned FP = MI->getOperand(0).getReg(); - assert(FP == QST.getRegisterInfo()->getFrameRegister() && - "Not a Frame Pointer, Nor a Spill Slot"); - assert(MI->getOperand(1).isImm() && "Not an offset"); - int Offset = MI->getOperand(1).getImm(); - int SrcReg = MI->getOperand(2).getReg(); - assert(Hexagon::PredRegsRegClass.contains(SrcReg) && - "Not a predicate register"); - if (!TII->isValidOffset(Hexagon::S2_storeri_io, Offset)) { - if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::CONST32_Int_Real), - HEXAGON_RESERVED_REG_1).addImm(Offset); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add), - HEXAGON_RESERVED_REG_1) - .addReg(FP).addReg(HEXAGON_RESERVED_REG_1); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), - HEXAGON_RESERVED_REG_2).addReg(SrcReg); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::S2_storeri_io)) - .addReg(HEXAGON_RESERVED_REG_1) - .addImm(0).addReg(HEXAGON_RESERVED_REG_2); - } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi), - HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), - HEXAGON_RESERVED_REG_2).addReg(SrcReg); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::S2_storeri_io)) - .addReg(HEXAGON_RESERVED_REG_1) - .addImm(0) - .addReg(HEXAGON_RESERVED_REG_2); - } - } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrpr), - HEXAGON_RESERVED_REG_2).addReg(SrcReg); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::S2_storeri_io)). - addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2); - } - MII = MBB->erase(MI); - --MII; - } else if (Opc == Hexagon::LDriw_pred) { - // DstReg = LDriw_pred [R30], ofst. - int DstReg = MI->getOperand(0).getReg(); - assert(Hexagon::PredRegsRegClass.contains(DstReg) && - "Not a predicate register"); - unsigned FP = MI->getOperand(1).getReg(); - assert(FP == QST.getRegisterInfo()->getFrameRegister() && - "Not a Frame Pointer, Nor a Spill Slot"); - assert(MI->getOperand(2).isImm() && "Not an offset"); - int Offset = MI->getOperand(2).getImm(); - if (!TII->isValidOffset(Hexagon::L2_loadri_io, Offset)) { - if (!TII->isValidOffset(Hexagon::A2_addi, Offset)) { - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::CONST32_Int_Real), - HEXAGON_RESERVED_REG_1).addImm(Offset); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add), - HEXAGON_RESERVED_REG_1) - .addReg(FP) - .addReg(HEXAGON_RESERVED_REG_1); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), - HEXAGON_RESERVED_REG_2) - .addReg(HEXAGON_RESERVED_REG_1) - .addImm(0); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), - DstReg).addReg(HEXAGON_RESERVED_REG_2); - } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_addi), - HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), - HEXAGON_RESERVED_REG_2) - .addReg(HEXAGON_RESERVED_REG_1) - .addImm(0); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), - DstReg).addReg(HEXAGON_RESERVED_REG_2); - } - } else { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::L2_loadri_io), - HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::C2_tfrrp), - DstReg).addReg(HEXAGON_RESERVED_REG_2); - } - MII = MBB->erase(MI); - --MII; - } - } - } - - return true; -} - -} - -//===----------------------------------------------------------------------===// -// Public Constructor Functions -//===----------------------------------------------------------------------===// - -static void initializePassOnce(PassRegistry &Registry) { - const char *Name = "Hexagon Expand Predicate Spill Code"; - PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred", - &HexagonExpandPredSpillCode::ID, - nullptr, false, false); - Registry.registerPass(*PI, true); -} - -void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializePassOnce) -} - -FunctionPass* -llvm::createHexagonExpandPredSpillCode() { - return new HexagonExpandPredSpillCode(); -} diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index d0c7f9c8960f..3de817cc8fb6 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -45,6 +45,11 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "Hexagon Hardware Loop Fixup"; } @@ -77,14 +82,16 @@ FunctionPass *llvm::createHexagonFixupHwLoops() { } /// \brief Returns true if the instruction is a hardware loop instruction. -static bool isHardwareLoop(const MachineInstr *MI) { - return MI->getOpcode() == Hexagon::J2_loop0r || - MI->getOpcode() == Hexagon::J2_loop0i || - MI->getOpcode() == Hexagon::J2_loop1r || - MI->getOpcode() == Hexagon::J2_loop1i; +static bool isHardwareLoop(const MachineInstr &MI) { + return MI.getOpcode() == Hexagon::J2_loop0r || + MI.getOpcode() == Hexagon::J2_loop0i || + MI.getOpcode() == Hexagon::J2_loop1r || + MI.getOpcode() == Hexagon::J2_loop1i; } bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; return fixupLoopInstrs(MF); } @@ -123,7 +130,6 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { // Second pass - check each loop instruction to see if it needs to be // converted. - InstOffset = 0; bool Changed = false; for (MachineBasicBlock &MBB : MF) { InstOffset = BlockToInstOffset[&MBB]; @@ -137,7 +143,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { ++MII; continue; } - if (isHardwareLoop(MII)) { + if (isHardwareLoop(*MII)) { assert(MII->getOperand(0).isMBB() && "Expect a basic block as loop operand"); int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 7a52a1c9eaec..25402147bf53 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -10,8 +10,8 @@ #define DEBUG_TYPE "hexagon-pei" +#include "HexagonBlockRanges.h" #include "HexagonFrameLowering.h" -#include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" @@ -19,12 +19,11 @@ #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -126,8 +125,7 @@ using namespace llvm; static cl::opt DisableDeallocRet("disable-hexagon-dealloc-ret", cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target")); - -static cl::opt NumberScavengerSlots("number-scavenger-slots", +static cl::opt NumberScavengerSlots("number-scavenger-slots", cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2), cl::ZeroOrMore); @@ -139,6 +137,10 @@ static cl::opt SpillFuncThresholdOs("spill-func-threshold-Os", cl::Hidden, cl::desc("Specify Os spill func threshold"), cl::init(1), cl::ZeroOrMore); +static cl::opt EnableStackOVFSanitizer("enable-stackovf-sanitizer", + cl::Hidden, cl::desc("Enable runtime checks for stack overflow."), + cl::init(false), cl::ZeroOrMore); + static cl::opt EnableShrinkWrapping("hexagon-shrink-frame", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable stack frame shrink wrapping")); @@ -150,6 +152,9 @@ static cl::opt ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX), static cl::opt UseAllocframe("use-allocframe", cl::init(true), cl::Hidden, cl::desc("Use allocframe more conservatively")); +static cl::opt OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden, + cl::init(true), cl::desc("Optimize spill slots")); + namespace llvm { void initializeHexagonCallFrameInformationPass(PassRegistry&); @@ -165,6 +170,10 @@ namespace { initializeHexagonCallFrameInformationPass(PR); } bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } }; char HexagonCallFrameInformation::ID = 0; @@ -213,8 +222,8 @@ namespace { /// Returns the callee saved register with the largest id in the vector. unsigned getMaxCalleeSavedReg(const std::vector &CSI, const TargetRegisterInfo &TRI) { - assert(Hexagon::R1 > 0 && - "Assume physical registers are encoded as positive integers"); + static_assert(Hexagon::R1 > 0, + "Assume physical registers are encoded as positive integers"); if (CSI.empty()) return 0; @@ -229,7 +238,8 @@ namespace { /// Checks if the basic block contains any instruction that needs a stack /// frame to be already in place. - bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) { + bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, + const HexagonRegisterInfo &HRI) { for (auto &I : MBB) { const MachineInstr *MI = &I; if (MI->isCall()) @@ -258,8 +268,9 @@ namespace { // a stack slot. if (TargetRegisterInfo::isVirtualRegister(R)) return true; - if (CSR[R]) - return true; + for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S) + if (CSR[*S]) + return true; } } return false; @@ -280,6 +291,40 @@ namespace { return true; return false; } + + /// Returns the "return" instruction from this block, or nullptr if there + /// isn't any. + MachineInstr *getReturn(MachineBasicBlock &MBB) { + for (auto &I : MBB) + if (I.isReturn()) + return &I; + return nullptr; + } + + bool isRestoreCall(unsigned Opc) { + switch (Opc) { + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC: + case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4: + case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC: + return true; + } + return false; + } + + inline bool isOptNone(const MachineFunction &MF) { + return MF.getFunction()->hasFnAttribute(Attribute::OptimizeNone) || + MF.getTarget().getOptLevel() == CodeGenOpt::None; + } + + inline bool isOptSize(const MachineFunction &MF) { + const Function &F = *MF.getFunction(); + return F.optForSize() && !F.optForMinSize(); + } + + inline bool isMinSize(const MachineFunction &MF) { + return MF.getFunction()->optForMinSize(); + } } @@ -330,10 +375,11 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, SmallVector SFBlocks; BitVector CSR(Hexagon::NUM_TARGET_REGS); for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P) - CSR[*P] = true; + for (MCSubRegIterator S(*P, &HRI, true); S.isValid(); ++S) + CSR[*S] = true; for (auto &I : MF) - if (needsStackFrame(I, CSR)) + if (needsStackFrame(I, CSR, HRI)) SFBlocks.push_back(&I); DEBUG({ @@ -386,6 +432,7 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, EpilogB = PDomB; } + /// Perform most of the PEI work here: /// - saving/restoring of the callee-saved registers, /// - stack frame creation and destruction. @@ -396,7 +443,6 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, auto &HST = static_cast(MF.getSubtarget()); auto &HRI = *HST.getRegisterInfo(); - assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); const std::vector &CSI = MFI->getCalleeSavedInfo(); @@ -404,8 +450,9 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, if (EnableShrinkWrapping) findShrunkPrologEpilog(MF, PrologB, EpilogB); - insertCSRSpillsInBlock(*PrologB, CSI, HRI); - insertPrologueInBlock(*PrologB); + bool PrologueStubs = false; + insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs); + insertPrologueInBlock(*PrologB, PrologueStubs); if (EpilogB) { insertCSRRestoresInBlock(*EpilogB, CSI, HRI); @@ -418,11 +465,34 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, for (auto &B : MF) if (B.isReturnBlock()) insertEpilogueInBlock(B); + + for (auto &B : MF) { + if (B.empty()) + continue; + MachineInstr *RetI = getReturn(B); + if (!RetI || isRestoreCall(RetI->getOpcode())) + continue; + for (auto &R : CSI) + RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true)); + } + } + + if (EpilogB) { + // If there is an epilog block, it may not have a return instruction. + // In such case, we need to add the callee-saved registers as live-ins + // in all blocks on all paths from the epilog to any return block. + unsigned MaxBN = 0; + for (auto &B : MF) + if (B.getNumber() >= 0) + MaxBN = std::max(MaxBN, unsigned(B.getNumber())); + BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1); + updateExitPaths(*EpilogB, EpilogB, DoneT, DoneF, Path); } } -void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { +void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, + bool PrologueStubs) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); auto &HST = MF.getSubtarget(); @@ -436,10 +506,10 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { // Get the number of bytes to allocate from the FrameInfo. unsigned FrameSize = MFI->getStackSize(); // Round up the max call frame size to the max alignment on the stack. - unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign); + unsigned MaxCFA = alignTo(MFI->getMaxCallFrameSize(), MaxAlign); MFI->setMaxCallFrameSize(MaxCFA); - FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign); + FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign); MFI->setStackSize(FrameSize); bool AlignStack = (MaxAlign > getStackAlignment()); @@ -497,6 +567,13 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { .addReg(SP) .addImm(-int64_t(MaxAlign)); } + + // If the stack-checking is enabled, and we spilled the callee-saved + // registers inline (i.e. did not use a spill function), then call + // the stack checker directly. + if (EnableStackOVFSanitizer && !PrologueStubs) + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CALLstk)) + .addExternalSymbol("__runtime_stack_check"); } void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { @@ -509,13 +586,7 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { auto &HRI = *HST.getRegisterInfo(); unsigned SP = HRI.getStackRegister(); - MachineInstr *RetI = nullptr; - for (auto &I : MBB) { - if (!I.isReturn()) - continue; - RetI = &I; - break; - } + MachineInstr *RetI = getReturn(MBB); unsigned RetOpc = RetI ? RetI->getOpcode() : 0; MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); @@ -536,7 +607,8 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc- // frame instruction if we encounter it. - if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) { + if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 || + RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC) { MachineBasicBlock::iterator It = RetI; ++It; // Delete all instructions after the RESTORE (except labels). @@ -556,7 +628,8 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { if (!MBB.empty() && InsertPt != MBB.begin()) { MachineBasicBlock::iterator PrevIt = std::prev(InsertPt); unsigned COpc = PrevIt->getOpcode(); - if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4) + if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 || + COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC) NeedsDeallocframe = false; } @@ -572,11 +645,56 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { unsigned NewOpc = Hexagon::L4_return; MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc)); // Transfer the function live-out registers. - NewI->copyImplicitOps(MF, RetI); + NewI->copyImplicitOps(MF, *RetI); MBB.erase(RetI); } +bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB, + MachineBasicBlock *RestoreB, BitVector &DoneT, BitVector &DoneF, + BitVector &Path) const { + assert(MBB.getNumber() >= 0); + unsigned BN = MBB.getNumber(); + if (Path[BN] || DoneF[BN]) + return false; + if (DoneT[BN]) + return true; + + auto &CSI = MBB.getParent()->getFrameInfo()->getCalleeSavedInfo(); + + Path[BN] = true; + bool ReachedExit = false; + for (auto &SB : MBB.successors()) + ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path); + + if (!MBB.empty() && MBB.back().isReturn()) { + // Add implicit uses of all callee-saved registers to the reached + // return instructions. This is to prevent the anti-dependency breaker + // from renaming these registers. + MachineInstr &RetI = MBB.back(); + if (!isRestoreCall(RetI.getOpcode())) + for (auto &R : CSI) + RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true)); + ReachedExit = true; + } + + // We don't want to add unnecessary live-ins to the restore block: since + // the callee-saved registers are being defined in it, the entry of the + // restore block cannot be on the path from the definitions to any exit. + if (ReachedExit && &MBB != RestoreB) { + for (auto &R : CSI) + if (!MBB.isLiveIn(R.getReg())) + MBB.addLiveIn(R.getReg()); + DoneT[BN] = true; + } + if (!ReachedExit) + DoneF[BN] = true; + + Path[BN] = false; + return ReachedExit; +} + + namespace { bool IsAllocFrame(MachineBasicBlock::const_iterator It) { if (!It->isBundle()) @@ -611,7 +729,7 @@ void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator At) const { MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); auto &HST = MF.getSubtarget(); auto &HII = *HST.getInstrInfo(); @@ -624,8 +742,9 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION); MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); + bool HasFP = hasFP(MF); - if (hasFP(MF)) { + if (HasFP) { unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); @@ -663,7 +782,7 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, Hexagon::NoRegister }; - const std::vector &CSI = MFI->getCalleeSavedInfo(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) { unsigned Reg = RegsToMove[i]; @@ -674,9 +793,22 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, if (F == CSI.end()) continue; + int64_t Offset; + if (HasFP) { + // If the function has a frame pointer (i.e. has an allocframe), + // then the CFA has been defined in terms of FP. Any offsets in + // the following CFI instructions have to be defined relative + // to FP, which points to the bottom of the stack frame. + // The function getFrameIndexReference can still choose to use SP + // for the offset calculation, so we cannot simply call it here. + // Instead, get the offset (relative to the FP) directly. + Offset = MFI.getObjectOffset(F->getFrameIdx()); + } else { + unsigned FrameReg; + Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg); + } // Subtract 8 to make room for R30 and R31, which are added above. - unsigned FrameReg; - int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8; + Offset -= 8; if (Reg < Hexagon::D0 || Reg > Hexagon::D15) { unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true); @@ -734,7 +866,7 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { return true; if (MFI.getStackSize() > 0) { - if (UseAllocframe) + if (EnableStackOVFSanitizer || UseAllocframe) return true; } @@ -752,8 +884,8 @@ enum SpillKind { SK_FromMemTailcall }; -static const char * -getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) { +static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType, + bool Stkchk = false) { const char * V4SpillToMemoryFunctions[] = { "__save_r16_through_r17", "__save_r16_through_r19", @@ -762,6 +894,14 @@ getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) { "__save_r16_through_r25", "__save_r16_through_r27" }; + const char * V4SpillToMemoryStkchkFunctions[] = { + "__save_r16_through_r17_stkchk", + "__save_r16_through_r19_stkchk", + "__save_r16_through_r21_stkchk", + "__save_r16_through_r23_stkchk", + "__save_r16_through_r25_stkchk", + "__save_r16_through_r27_stkchk" }; + const char * V4SpillFromMemoryFunctions[] = { "__restore_r16_through_r17_and_deallocframe", "__restore_r16_through_r19_and_deallocframe", @@ -783,7 +923,8 @@ getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) { switch(SpillType) { case SK_ToMem: - SpillFunc = V4SpillToMemoryFunctions; + SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions + : V4SpillToMemoryFunctions; break; case SK_FromMem: SpillFunc = V4SpillFromMemoryFunctions; @@ -814,32 +955,20 @@ getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) { return 0; } -/// Adds all callee-saved registers up to MaxReg to the instruction. -static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst, - unsigned MaxReg, bool IsDef) { - // Add the callee-saved registers as implicit uses. - for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) { - MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true); - Inst->addOperand(ImpUse); - } -} - int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { auto &MFI = *MF.getFrameInfo(); auto &HRI = *MF.getSubtarget().getRegisterInfo(); - // Large parts of this code are shared with HRI::eliminateFrameIndex. int Offset = MFI.getObjectOffset(FI); bool HasAlloca = MFI.hasVarSizedObjects(); bool HasExtraAlign = HRI.needsStackRealignment(MF); bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister(); - unsigned AP = 0; - if (const MachineInstr *AI = getAlignaInstr(MF)) - AP = AI->getOperand(0).getReg(); + auto &HMFI = *MF.getInfo(); + unsigned AP = HMFI.getStackAlignBasePhysReg(); unsigned FrameSize = MFI.getStackSize(); bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). @@ -912,24 +1041,40 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, - const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { + const CSIVect &CSI, const HexagonRegisterInfo &HRI, + bool &PrologueStubs) const { if (CSI.empty()) return true; MachineBasicBlock::iterator MI = MBB.begin(); + PrologueStubs = false; MachineFunction &MF = *MBB.getParent(); auto &HII = *MF.getSubtarget().getInstrInfo(); if (useSpillFunction(MF, CSI)) { + PrologueStubs = true; unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI); - const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem); + bool StkOvrFlowEnabled = EnableStackOVFSanitizer; + const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem, + StkOvrFlowEnabled); + auto &HTM = static_cast(MF.getTarget()); + bool IsPIC = HTM.isPositionIndependent(); + // Call spill function. DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); + unsigned SpillOpc; + if (StkOvrFlowEnabled) + SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC + : Hexagon::SAVE_REGISTERS_CALL_V4STK; + else + SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC + : Hexagon::SAVE_REGISTERS_CALL_V4; + MachineInstr *SaveRegsCall = - BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4)) + BuildMI(MBB, MI, DL, HII.get(SpillOpc)) .addExternalSymbol(SpillFun); // Add callee-saved registers as use. - addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false); + addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true); // Add live in registers. for (unsigned I = 0; I < CSI.size(); ++I) MBB.addLiveIn(CSI[I].getReg()); @@ -966,6 +1111,8 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI); SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem; const char *RestoreFn = getSpillFunctionFor(MaxR, Kind); + auto &HTM = static_cast(MF.getTarget()); + bool IsPIC = HTM.isPositionIndependent(); // Call spill function. DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() @@ -973,20 +1120,22 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, MachineInstr *DeallocCall = nullptr; if (HasTC) { - unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; + unsigned ROpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC + : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc)) .addExternalSymbol(RestoreFn); } else { // The block has a return. MachineBasicBlock::iterator It = MBB.getFirstTerminator(); assert(It->isReturn() && std::next(It) == MBB.end()); - unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4; + unsigned ROpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC + : Hexagon::RESTORE_DEALLOC_RET_JMP_V4; DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc)) .addExternalSymbol(RestoreFn); // Transfer the function live-out registers. - DeallocCall->copyImplicitOps(MF, It); + DeallocCall->copyImplicitOps(MF, *It); } - addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true); + addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false); return true; } @@ -996,18 +1145,19 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, int FI = CSI[i].getFrameIdx(); HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); } + return true; } - -void HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { +MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { MachineInstr &MI = *I; unsigned Opc = MI.getOpcode(); (void)Opc; // Silence compiler warning. assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) && "Cannot handle this call frame pseudo instruction"); - MBB.erase(I); + return MBB.erase(I); } @@ -1025,14 +1175,16 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized( return; unsigned LFS = MFI->getLocalFrameSize(); - int Offset = -LFS; for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i)) continue; - int S = MFI->getObjectSize(i); - LFS += S; - Offset -= S; - MFI->mapLocalFrameObject(i, Offset); + unsigned S = MFI->getObjectSize(i); + // Reduce the alignment to at most 8. This will require unaligned vector + // stores if they happen here. + unsigned A = std::max(MFI->getObjectAlignment(i), 8U); + MFI->setObjectAlignment(i, 8); + LFS = alignTo(LFS+S, A); + MFI->mapLocalFrameObject(i, -LFS); } MFI->setLocalFrameSize(LFS); @@ -1041,142 +1193,35 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized( if (A == 0) MFI->setLocalFrameMaxAlign(8); MFI->setUseLocalStackAllocationBlock(true); + + // Set the physical aligned-stack base address register. + unsigned AP = 0; + if (const MachineInstr *AI = getAlignaInstr(MF)) + AP = AI->getOperand(0).getReg(); + auto &HMFI = *MF.getInfo(); + HMFI.setStackAlignBasePhysReg(AP); } -/// Returns true if there is no caller saved registers available. +/// Returns true if there are no caller-saved registers available in class RC. static bool needToReserveScavengingSpillSlots(MachineFunction &MF, - const HexagonRegisterInfo &HRI) { + const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF); - // Check for an unused caller-saved register. - for ( ; *CallerSavedRegs; ++CallerSavedRegs) { - MCPhysReg FreeReg = *CallerSavedRegs; - if (!MRI.reg_nodbg_empty(FreeReg)) - continue; - - // Check aliased register usage. - bool IsCurrentRegUsed = false; - for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI) - if (!MRI.reg_nodbg_empty(*AI)) { - IsCurrentRegUsed = true; - break; - } - if (IsCurrentRegUsed) - continue; - // Neither directly used nor used through an aliased register. + auto IsUsed = [&HRI,&MRI] (unsigned Reg) -> bool { + for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); ++AI) + if (MRI.isPhysRegUsed(*AI)) + return true; return false; - } - // All caller-saved registers are used. - return true; -} - - -/// Replaces the predicate spill code pseudo instructions by valid instructions. -bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF) - const { - auto &HST = static_cast(MF.getSubtarget()); - auto &HII = *HST.getInstrInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - bool HasReplacedPseudoInst = false; - // Replace predicate spill pseudo instructions by real code. - // Loop over all of the basic blocks. - for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock *MBB = &*MBBb; - // Traverse the basic block. - MachineBasicBlock::iterator NextII; - for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); - MII = NextII) { - MachineInstr *MI = MII; - NextII = std::next(MII); - int Opc = MI->getOpcode(); - if (Opc == Hexagon::STriw_pred) { - HasReplacedPseudoInst = true; - // STriw_pred FI, 0, SrcReg; - unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - unsigned SrcReg = MI->getOperand(2).getReg(); - bool IsOrigSrcRegKilled = MI->getOperand(2).isKill(); - - assert(MI->getOperand(0).isFI() && "Expect a frame index"); - assert(Hexagon::PredRegsRegClass.contains(SrcReg) && - "Not a predicate register"); - - // Insert transfer to general purpose register. - // VirtReg = C2_tfrpr SrcPredReg - BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr), - VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled)); - - // Change instruction to S2_storeri_io. - // S2_storeri_io FI, 0, VirtReg - MI->setDesc(HII.get(Hexagon::S2_storeri_io)); - MI->getOperand(2).setReg(VirtReg); - MI->getOperand(2).setIsKill(); - - } else if (Opc == Hexagon::LDriw_pred) { - // DstReg = LDriw_pred FI, 0 - MachineOperand &M0 = MI->getOperand(0); - if (M0.isDead()) { - MBB->erase(MII); - continue; - } - - unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - unsigned DestReg = MI->getOperand(0).getReg(); - - assert(MI->getOperand(1).isFI() && "Expect a frame index"); - assert(Hexagon::PredRegsRegClass.contains(DestReg) && - "Not a predicate register"); - - // Change instruction to L2_loadri_io. - // VirtReg = L2_loadri_io FI, 0 - MI->setDesc(HII.get(Hexagon::L2_loadri_io)); - MI->getOperand(0).setReg(VirtReg); - - // Insert transfer to general purpose register. - // DestReg = C2_tfrrp VirtReg - const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp); - BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg) - .addReg(VirtReg, getKillRegState(true)); - HasReplacedPseudoInst = true; - } - } - } - return HasReplacedPseudoInst; -} - - -void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, - BitVector &SavedRegs, - RegScavenger *RS) const { - TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - - auto &HST = static_cast(MF.getSubtarget()); - auto &HRI = *HST.getRegisterInfo(); - - bool HasEHReturn = MF.getInfo()->hasEHReturn(); - - // If we have a function containing __builtin_eh_return we want to spill and - // restore all callee saved registers. Pretend that they are used. - if (HasEHReturn) { - for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs; - ++CSRegs) - SavedRegs.set(*CSRegs); - } + }; - const TargetRegisterClass &RC = Hexagon::IntRegsRegClass; + // Check for an unused caller-saved register. Callee-saved registers + // have become pristine by now. + for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; ++P) + if (!IsUsed(*P)) + return false; - // Replace predicate register pseudo spill code. - bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF); - - // We need to reserve a a spill slot if scavenging could potentially require - // spilling a scavenged register. - if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - for (int i=0; i < NumberScavengerSlots; i++) - RS->addScavengingFrameIndex( - MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment())); - } + // All caller-saved registers are used. + return true; } @@ -1327,6 +1372,811 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, } +bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + unsigned DstR = MI->getOperand(0).getReg(); + unsigned SrcR = MI->getOperand(1).getReg(); + if (!Hexagon::ModRegsRegClass.contains(DstR) || + !Hexagon::ModRegsRegClass.contains(SrcR)) + return false; + + unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR) + .addOperand(MI->getOperand(1)); + BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR) + .addReg(TmpR, RegState::Kill); + + NewRegs.push_back(TmpR); + B.erase(It); + return true; +} + +bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + unsigned Opc = MI->getOpcode(); + unsigned SrcR = MI->getOperand(2).getReg(); + bool IsKill = MI->getOperand(2).isKill(); + + assert(MI->getOperand(0).isFI() && "Expect a frame index"); + int FI = MI->getOperand(0).getIndex(); + + // TmpR = C2_tfrpr SrcR if SrcR is a predicate register + // TmpR = A2_tfrcrr SrcR if SrcR is a modifier register + unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr + : Hexagon::A2_tfrcrr; + BuildMI(B, It, DL, HII.get(TfrOpc), TmpR) + .addReg(SrcR, getKillRegState(IsKill)); + + // S2_storeri_io FI, 0, TmpR + BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io)) + .addFrameIndex(FI) + .addImm(0) + .addReg(TmpR, RegState::Kill) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + NewRegs.push_back(TmpR); + B.erase(It); + return true; +} + +bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + unsigned Opc = MI->getOpcode(); + unsigned DstR = MI->getOperand(0).getReg(); + + assert(MI->getOperand(1).isFI() && "Expect a frame index"); + int FI = MI->getOperand(1).getIndex(); + + // TmpR = L2_loadri_io FI, 0 + unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR) + .addFrameIndex(FI) + .addImm(0) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + // DstR = C2_tfrrp TmpR if DstR is a predicate register + // DstR = A2_tfrrcr TmpR if DstR is a modifier register + unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp + : Hexagon::A2_tfrrcr; + BuildMI(B, It, DL, HII.get(TfrOpc), DstR) + .addReg(TmpR, RegState::Kill); + + NewRegs.push_back(TmpR); + B.erase(It); + return true; +} + + +bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + auto &HST = B.getParent()->getSubtarget(); + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + unsigned SrcR = MI->getOperand(2).getReg(); + bool IsKill = MI->getOperand(2).isKill(); + + assert(MI->getOperand(0).isFI() && "Expect a frame index"); + int FI = MI->getOperand(0).getIndex(); + + bool Is128B = HST.useHVXDblOps(); + auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass + : &Hexagon::VectorRegs128BRegClass; + + // Insert transfer to general vector register. + // TmpR0 = A2_tfrsi 0x01010101 + // TmpR1 = V6_vandqrt Qx, TmpR0 + // store FI, 0, TmpR1 + unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + unsigned TmpR1 = MRI.createVirtualRegister(RC); + + BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) + .addImm(0x01010101); + + unsigned VandOpc = !Is128B ? Hexagon::V6_vandqrt : Hexagon::V6_vandqrt_128B; + BuildMI(B, It, DL, HII.get(VandOpc), TmpR1) + .addReg(SrcR, getKillRegState(IsKill)) + .addReg(TmpR0, RegState::Kill); + + auto *HRI = B.getParent()->getSubtarget().getRegisterInfo(); + HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI); + expandStoreVec(B, std::prev(It), MRI, HII, NewRegs); + + NewRegs.push_back(TmpR0); + NewRegs.push_back(TmpR1); + B.erase(It); + return true; +} + +bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + auto &HST = B.getParent()->getSubtarget(); + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + unsigned DstR = MI->getOperand(0).getReg(); + + assert(MI->getOperand(1).isFI() && "Expect a frame index"); + int FI = MI->getOperand(1).getIndex(); + + bool Is128B = HST.useHVXDblOps(); + auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass + : &Hexagon::VectorRegs128BRegClass; + + // TmpR0 = A2_tfrsi 0x01010101 + // TmpR1 = load FI, 0 + // DstR = V6_vandvrt TmpR1, TmpR0 + unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + unsigned TmpR1 = MRI.createVirtualRegister(RC); + + BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) + .addImm(0x01010101); + auto *HRI = B.getParent()->getSubtarget().getRegisterInfo(); + HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI); + expandLoadVec(B, std::prev(It), MRI, HII, NewRegs); + + unsigned VandOpc = !Is128B ? Hexagon::V6_vandvrt : Hexagon::V6_vandvrt_128B; + BuildMI(B, It, DL, HII.get(VandOpc), DstR) + .addReg(TmpR1, RegState::Kill) + .addReg(TmpR0, RegState::Kill); + + NewRegs.push_back(TmpR0); + NewRegs.push_back(TmpR1); + B.erase(It); + return true; +} + +bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + MachineFunction &MF = *B.getParent(); + auto &HST = MF.getSubtarget(); + auto &MFI = *MF.getFrameInfo(); + auto &HRI = *MF.getSubtarget().getRegisterInfo(); + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + + unsigned SrcR = MI->getOperand(2).getReg(); + unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::subreg_loreg); + unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::subreg_hireg); + bool IsKill = MI->getOperand(2).isKill(); + + assert(MI->getOperand(0).isFI() && "Expect a frame index"); + int FI = MI->getOperand(0).getIndex(); + + bool Is128B = HST.useHVXDblOps(); + auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass + : &Hexagon::VectorRegs128BRegClass; + unsigned Size = RC->getSize(); + unsigned NeedAlign = RC->getAlignment(); + unsigned HasAlign = MFI.getObjectAlignment(FI); + unsigned StoreOpc; + + // Store low part. + if (NeedAlign <= HasAlign) + StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32b_ai_128B; + else + StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B; + + BuildMI(B, It, DL, HII.get(StoreOpc)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcLo, getKillRegState(IsKill)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + // Load high part. + if (NeedAlign <= MinAlign(HasAlign, Size)) + StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32b_ai_128B; + else + StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B; + + BuildMI(B, It, DL, HII.get(StoreOpc)) + .addFrameIndex(FI) + .addImm(Size) + .addReg(SrcHi, getKillRegState(IsKill)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + B.erase(It); + return true; +} + +bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + MachineFunction &MF = *B.getParent(); + auto &HST = MF.getSubtarget(); + auto &MFI = *MF.getFrameInfo(); + auto &HRI = *MF.getSubtarget().getRegisterInfo(); + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + + unsigned DstR = MI->getOperand(0).getReg(); + unsigned DstHi = HRI.getSubReg(DstR, Hexagon::subreg_hireg); + unsigned DstLo = HRI.getSubReg(DstR, Hexagon::subreg_loreg); + + assert(MI->getOperand(1).isFI() && "Expect a frame index"); + int FI = MI->getOperand(1).getIndex(); + + bool Is128B = HST.useHVXDblOps(); + auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass + : &Hexagon::VectorRegs128BRegClass; + unsigned Size = RC->getSize(); + unsigned NeedAlign = RC->getAlignment(); + unsigned HasAlign = MFI.getObjectAlignment(FI); + unsigned LoadOpc; + + // Load low part. + if (NeedAlign <= HasAlign) + LoadOpc = !Is128B ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32b_ai_128B; + else + LoadOpc = !Is128B ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32Ub_ai_128B; + + BuildMI(B, It, DL, HII.get(LoadOpc), DstLo) + .addFrameIndex(FI) + .addImm(0) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + // Load high part. + if (NeedAlign <= MinAlign(HasAlign, Size)) + LoadOpc = !Is128B ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32b_ai_128B; + else + LoadOpc = !Is128B ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32Ub_ai_128B; + + BuildMI(B, It, DL, HII.get(LoadOpc), DstHi) + .addFrameIndex(FI) + .addImm(Size) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + B.erase(It); + return true; +} + +bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + MachineFunction &MF = *B.getParent(); + auto &HST = MF.getSubtarget(); + auto &MFI = *MF.getFrameInfo(); + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + + unsigned SrcR = MI->getOperand(2).getReg(); + bool IsKill = MI->getOperand(2).isKill(); + + assert(MI->getOperand(0).isFI() && "Expect a frame index"); + int FI = MI->getOperand(0).getIndex(); + + bool Is128B = HST.useHVXDblOps(); + auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass + : &Hexagon::VectorRegs128BRegClass; + + unsigned NeedAlign = RC->getAlignment(); + unsigned HasAlign = MFI.getObjectAlignment(FI); + unsigned StoreOpc; + + if (NeedAlign <= HasAlign) + StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32b_ai_128B; + else + StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B; + + BuildMI(B, It, DL, HII.get(StoreOpc)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcR, getKillRegState(IsKill)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + B.erase(It); + return true; +} + +bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, + MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, + const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { + MachineFunction &MF = *B.getParent(); + auto &HST = MF.getSubtarget(); + auto &MFI = *MF.getFrameInfo(); + MachineInstr *MI = &*It; + DebugLoc DL = MI->getDebugLoc(); + + unsigned DstR = MI->getOperand(0).getReg(); + + assert(MI->getOperand(1).isFI() && "Expect a frame index"); + int FI = MI->getOperand(1).getIndex(); + + bool Is128B = HST.useHVXDblOps(); + auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass + : &Hexagon::VectorRegs128BRegClass; + + unsigned NeedAlign = RC->getAlignment(); + unsigned HasAlign = MFI.getObjectAlignment(FI); + unsigned LoadOpc; + + if (NeedAlign <= HasAlign) + LoadOpc = !Is128B ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32b_ai_128B; + else + LoadOpc = !Is128B ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32Ub_ai_128B; + + BuildMI(B, It, DL, HII.get(LoadOpc), DstR) + .addFrameIndex(FI) + .addImm(0) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + B.erase(It); + return true; +} + + +bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF, + SmallVectorImpl &NewRegs) const { + auto &HST = MF.getSubtarget(); + auto &HII = *HST.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool Changed = false; + + for (auto &B : MF) { + // Traverse the basic block. + MachineBasicBlock::iterator NextI; + for (auto I = B.begin(), E = B.end(); I != E; I = NextI) { + MachineInstr *MI = &*I; + NextI = std::next(I); + unsigned Opc = MI->getOpcode(); + + switch (Opc) { + case TargetOpcode::COPY: + Changed |= expandCopy(B, I, MRI, HII, NewRegs); + break; + case Hexagon::STriw_pred: + case Hexagon::STriw_mod: + Changed |= expandStoreInt(B, I, MRI, HII, NewRegs); + break; + case Hexagon::LDriw_pred: + case Hexagon::LDriw_mod: + Changed |= expandLoadInt(B, I, MRI, HII, NewRegs); + break; + case Hexagon::STriq_pred_V6: + case Hexagon::STriq_pred_V6_128B: + Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs); + break; + case Hexagon::LDriq_pred_V6: + case Hexagon::LDriq_pred_V6_128B: + Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs); + break; + case Hexagon::LDrivv_pseudo_V6: + case Hexagon::LDrivv_pseudo_V6_128B: + Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs); + break; + case Hexagon::STrivv_pseudo_V6: + case Hexagon::STrivv_pseudo_V6_128B: + Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs); + break; + case Hexagon::STriv_pseudo_V6: + case Hexagon::STriv_pseudo_V6_128B: + Changed |= expandStoreVec(B, I, MRI, HII, NewRegs); + break; + case Hexagon::LDriv_pseudo_V6: + case Hexagon::LDriv_pseudo_V6_128B: + Changed |= expandLoadVec(B, I, MRI, HII, NewRegs); + break; + } + } + } + + return Changed; +} + + +void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + auto &HST = MF.getSubtarget(); + auto &HRI = *HST.getRegisterInfo(); + + SavedRegs.resize(HRI.getNumRegs()); + + // If we have a function containing __builtin_eh_return we want to spill and + // restore all callee saved registers. Pretend that they are used. + if (MF.getInfo()->hasEHReturn()) + for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); *R; ++R) + SavedRegs.set(*R); + + // Replace predicate register pseudo spill code. + SmallVector NewRegs; + expandSpillMacros(MF, NewRegs); + if (OptimizeSpillSlots && !isOptNone(MF)) + optimizeSpillSlots(MF, NewRegs); + + // We need to reserve a a spill slot if scavenging could potentially require + // spilling a scavenged register. + if (!NewRegs.empty()) { + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SetVector SpillRCs; + // Reserve an int register in any case, because it could be used to hold + // the stack offset in case it does not fit into a spill instruction. + SpillRCs.insert(&Hexagon::IntRegsRegClass); + + for (unsigned VR : NewRegs) + SpillRCs.insert(MRI.getRegClass(VR)); + + for (auto *RC : SpillRCs) { + if (!needToReserveScavengingSpillSlots(MF, HRI, RC)) + continue; + unsigned Num = RC == &Hexagon::IntRegsRegClass ? NumberScavengerSlots : 1; + unsigned S = RC->getSize(), A = RC->getAlignment(); + for (unsigned i = 0; i < Num; i++) { + int NewFI = MFI.CreateSpillStackObject(S, A); + RS->addScavengingFrameIndex(NewFI); + } + } + } + + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); +} + + +unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF, + HexagonBlockRanges::IndexRange &FIR, + HexagonBlockRanges::InstrIndexMap &IndexMap, + HexagonBlockRanges::RegToRangeMap &DeadMap, + const TargetRegisterClass *RC) const { + auto &HRI = *MF.getSubtarget().getRegisterInfo(); + auto &MRI = MF.getRegInfo(); + + auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool { + auto F = DeadMap.find({Reg,0}); + if (F == DeadMap.end()) + return false; + for (auto &DR : F->second) + if (DR.contains(FIR)) + return true; + return false; + }; + + for (unsigned Reg : RC->getRawAllocationOrder(MF)) { + bool Dead = true; + for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) { + if (isDead(R.Reg)) + continue; + Dead = false; + break; + } + if (Dead) + return Reg; + } + return 0; +} + +void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, + SmallVectorImpl &VRegs) const { + auto &HST = MF.getSubtarget(); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + auto &MRI = MF.getRegInfo(); + HexagonBlockRanges HBR(MF); + + typedef std::map + BlockIndexMap; + typedef std::map + BlockRangeMap; + typedef HexagonBlockRanges::IndexType IndexType; + + struct SlotInfo { + BlockRangeMap Map; + unsigned Size; + const TargetRegisterClass *RC; + + SlotInfo() : Map(), Size(0), RC(nullptr) {} + }; + + BlockIndexMap BlockIndexes; + SmallSet BadFIs; + std::map FIRangeMap; + + auto getRegClass = [&MRI,&HRI] (HexagonBlockRanges::RegisterRef R) + -> const TargetRegisterClass* { + if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) + assert(R.Sub == 0); + if (TargetRegisterInfo::isVirtualRegister(R.Reg)) { + auto *RCR = MRI.getRegClass(R.Reg); + if (R.Sub == 0) + return RCR; + unsigned PR = *RCR->begin(); + R.Reg = HRI.getSubReg(PR, R.Sub); + } + return HRI.getMinimalPhysRegClass(R.Reg); + }; + // Accumulate register classes: get a common class for a pre-existing + // class HaveRC and a new class NewRC. Return nullptr if a common class + // cannot be found, otherwise return the resulting class. If HaveRC is + // nullptr, assume that it is still unset. + auto getCommonRC = [&HRI] (const TargetRegisterClass *HaveRC, + const TargetRegisterClass *NewRC) + -> const TargetRegisterClass* { + if (HaveRC == nullptr || HaveRC == NewRC) + return NewRC; + // Different classes, both non-null. Pick the more general one. + if (HaveRC->hasSubClassEq(NewRC)) + return HaveRC; + if (NewRC->hasSubClassEq(HaveRC)) + return NewRC; + return nullptr; + }; + + // Scan all blocks in the function. Check all occurrences of frame indexes, + // and collect relevant information. + for (auto &B : MF) { + std::map LastStore, LastLoad; + // Emplace appears not to be supported in gcc 4.7.2-4. + //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B)); + auto P = BlockIndexes.insert( + std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B))); + auto &IndexMap = P.first->second; + DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n" + << IndexMap << '\n'); + + for (auto &In : B) { + int LFI, SFI; + bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In); + bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In); + if (Load && Store) { + // If it's both a load and a store, then we won't handle it. + BadFIs.insert(LFI); + BadFIs.insert(SFI); + continue; + } + // Check for register classes of the register used as the source for + // the store, and the register used as the destination for the load. + // Also, only accept base+imm_offset addressing modes. Other addressing + // modes can have side-effects (post-increments, etc.). For stack + // slots they are very unlikely, so there is not much loss due to + // this restriction. + if (Load || Store) { + int TFI = Load ? LFI : SFI; + unsigned AM = HII.getAddrMode(&In); + SlotInfo &SI = FIRangeMap[TFI]; + bool Bad = (AM != HexagonII::BaseImmOffset); + if (!Bad) { + // If the addressing mode is ok, check the register class. + const TargetRegisterClass *RC = nullptr; + if (Load) { + MachineOperand &DataOp = In.getOperand(0); + RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()}); + } else { + MachineOperand &DataOp = In.getOperand(2); + RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()}); + } + RC = getCommonRC(SI.RC, RC); + if (RC == nullptr) + Bad = true; + else + SI.RC = RC; + } + if (!Bad) { + // Check sizes. + unsigned S = (1U << (HII.getMemAccessSize(&In) - 1)); + if (SI.Size != 0 && SI.Size != S) + Bad = true; + else + SI.Size = S; + } + if (Bad) + BadFIs.insert(TFI); + } + + // Locate uses of frame indices. + for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) { + const MachineOperand &Op = In.getOperand(i); + if (!Op.isFI()) + continue; + int FI = Op.getIndex(); + // Make sure that the following operand is an immediate and that + // it is 0. This is the offset in the stack object. + if (i+1 >= n || !In.getOperand(i+1).isImm() || + In.getOperand(i+1).getImm() != 0) + BadFIs.insert(FI); + if (BadFIs.count(FI)) + continue; + + IndexType Index = IndexMap.getIndex(&In); + if (Load) { + if (LastStore[FI] == IndexType::None) + LastStore[FI] = IndexType::Entry; + LastLoad[FI] = Index; + } else if (Store) { + HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; + if (LastStore[FI] != IndexType::None) + RL.add(LastStore[FI], LastLoad[FI], false, false); + else if (LastLoad[FI] != IndexType::None) + RL.add(IndexType::Entry, LastLoad[FI], false, false); + LastLoad[FI] = IndexType::None; + LastStore[FI] = Index; + } else { + BadFIs.insert(FI); + } + } + } + + for (auto &I : LastLoad) { + IndexType LL = I.second; + if (LL == IndexType::None) + continue; + auto &RL = FIRangeMap[I.first].Map[&B]; + IndexType &LS = LastStore[I.first]; + if (LS != IndexType::None) + RL.add(LS, LL, false, false); + else + RL.add(IndexType::Entry, LL, false, false); + LS = IndexType::None; + } + for (auto &I : LastStore) { + IndexType LS = I.second; + if (LS == IndexType::None) + continue; + auto &RL = FIRangeMap[I.first].Map[&B]; + RL.add(LS, IndexType::None, false, false); + } + } + + DEBUG({ + for (auto &P : FIRangeMap) { + dbgs() << "fi#" << P.first; + if (BadFIs.count(P.first)) + dbgs() << " (bad)"; + dbgs() << " RC: "; + if (P.second.RC != nullptr) + dbgs() << HRI.getRegClassName(P.second.RC) << '\n'; + else + dbgs() << "\n"; + for (auto &R : P.second.Map) + dbgs() << " BB#" << R.first->getNumber() << " { " << R.second << "}\n"; + } + }); + + // When a slot is loaded from in a block without being stored to in the + // same block, it is live-on-entry to this block. To avoid CFG analysis, + // consider this slot to be live-on-exit from all blocks. + SmallSet LoxFIs; + + std::map> BlockFIMap; + + for (auto &P : FIRangeMap) { + // P = pair(FI, map: BB->RangeList) + if (BadFIs.count(P.first)) + continue; + for (auto &B : MF) { + auto F = P.second.Map.find(&B); + // F = pair(BB, RangeList) + if (F == P.second.Map.end() || F->second.empty()) + continue; + HexagonBlockRanges::IndexRange &IR = F->second.front(); + if (IR.start() == IndexType::Entry) + LoxFIs.insert(P.first); + BlockFIMap[&B].push_back(P.first); + } + } + + DEBUG({ + dbgs() << "Block-to-FI map (* -- live-on-exit):\n"; + for (auto &P : BlockFIMap) { + auto &FIs = P.second; + if (FIs.empty()) + continue; + dbgs() << " BB#" << P.first->getNumber() << ": {"; + for (auto I : FIs) { + dbgs() << " fi#" << I; + if (LoxFIs.count(I)) + dbgs() << '*'; + } + dbgs() << " }\n"; + } + }); + + // eliminate loads, when all loads eliminated, eliminate all stores. + for (auto &B : MF) { + auto F = BlockIndexes.find(&B); + assert(F != BlockIndexes.end()); + HexagonBlockRanges::InstrIndexMap &IM = F->second; + HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM); + HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM); + DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n" + << HexagonBlockRanges::PrintRangeMap(DM, HRI)); + + for (auto FI : BlockFIMap[&B]) { + if (BadFIs.count(FI)) + continue; + DEBUG(dbgs() << "Working on fi#" << FI << '\n'); + HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; + for (auto &Range : RL) { + DEBUG(dbgs() << "--Examining range:" << RL << '\n'); + if (!IndexType::isInstr(Range.start()) || + !IndexType::isInstr(Range.end())) + continue; + MachineInstr *SI = IM.getInstr(Range.start()); + MachineInstr *EI = IM.getInstr(Range.end()); + assert(SI->mayStore() && "Unexpected start instruction"); + assert(EI->mayLoad() && "Unexpected end instruction"); + MachineOperand &SrcOp = SI->getOperand(2); + + HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(), + SrcOp.getSubReg() }; + auto *RC = getRegClass({SrcOp.getReg(), SrcOp.getSubReg()}); + // The this-> is needed to unconfuse MSVC. + unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC); + DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n'); + if (FoundR == 0) + continue; + + // Generate the copy-in: "FoundR = COPY SrcR" at the store location. + MachineBasicBlock::iterator StartIt = SI, NextIt; + MachineInstr *CopyIn = nullptr; + if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) { + const DebugLoc &DL = SI->getDebugLoc(); + CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR) + .addOperand(SrcOp); + } + + ++StartIt; + // Check if this is a last store and the FI is live-on-exit. + if (LoxFIs.count(FI) && (&Range == &RL.back())) { + // Update store's source register. + if (unsigned SR = SrcOp.getSubReg()) + SrcOp.setReg(HRI.getSubReg(FoundR, SR)); + else + SrcOp.setReg(FoundR); + SrcOp.setSubReg(0); + // We are keeping this register live. + SrcOp.setIsKill(false); + } else { + B.erase(SI); + IM.replaceInstr(SI, CopyIn); + } + + auto EndIt = std::next(MachineBasicBlock::iterator(EI)); + for (auto It = StartIt; It != EndIt; It = NextIt) { + MachineInstr *MI = &*It; + NextIt = std::next(It); + int TFI; + if (!HII.isLoadFromStackSlot(*MI, TFI) || TFI != FI) + continue; + unsigned DstR = MI->getOperand(0).getReg(); + assert(MI->getOperand(0).getSubReg() == 0); + MachineInstr *CopyOut = nullptr; + if (DstR != FoundR) { + DebugLoc DL = MI->getDebugLoc(); + unsigned MemSize = (1U << (HII.getMemAccessSize(MI) - 1)); + assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset); + unsigned CopyOpc = TargetOpcode::COPY; + if (HII.isSignExtendingLoad(*MI)) + CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth; + else if (HII.isZeroExtendingLoad(*MI)) + CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth; + CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR) + .addReg(FoundR, getKillRegState(MI == EI)); + } + IM.replaceInstr(MI, CopyOut); + B.erase(It); + } + + // Update the dead map. + HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 }; + for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI)) + DM[RR].subtract(Range); + } // for Range in range list + } + } +} + + void HexagonFrameLowering::expandAlloca(MachineInstr *AI, const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const { MachineBasicBlock &MB = *AI->getParent(); @@ -1407,15 +2257,13 @@ const MachineInstr *HexagonFrameLowering::getAlignaInstr( } -// FIXME: Use Function::optForSize(). -inline static bool isOptSize(const MachineFunction &MF) { - AttributeSet AF = MF.getFunction()->getAttributes(); - return AF.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize); -} - -inline static bool isMinSize(const MachineFunction &MF) { - return MF.getFunction()->optForMinSize(); +/// Adds all callee-saved registers as implicit uses or defs to the +/// instruction. +void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, + const CSIVect &CSI, bool IsDef, bool IsKill) const { + // Add the callee-saved registers as implicit uses. + for (auto &R : CSI) + MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill)); } @@ -1472,7 +2320,18 @@ bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const { if (shouldInlineCSR(MF, CSI)) return false; + // The restore functions do a bit more than just restoring registers. + // The non-returning versions will go back directly to the caller's + // caller, others will clean up the stack frame in preparation for + // a tail call. Using them can still save code size even if only one + // register is getting restores. Make the decision based on -Oz: + // using -Os will use inline restore for a single register. + if (isMinSize(MF)) + return true; unsigned NumCSI = CSI.size(); + if (NumCSI <= 1) + return false; + unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1 : SpillFuncThreshold; return Threshold < NumCSI; diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 683b303d43ea..3e76214559b7 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -11,6 +11,7 @@ #define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H #include "Hexagon.h" +#include "HexagonBlockRanges.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -41,22 +42,23 @@ public: return true; } - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, - RegScavenger *RS = nullptr) const override; + RegScavenger *RS = nullptr) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, - RegScavenger *RS) const override; + RegScavenger *RS) const override; bool targetHandlesStackFrameRounding() const override { return true; } int getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const override; + unsigned &FrameReg) const override; bool hasFP(const MachineFunction &MF) const override; const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) - const override { + const override { static const SpillSlot Offsets[] = { { Hexagon::R17, -4 }, { Hexagon::R16, -8 }, { Hexagon::D8, -8 }, { Hexagon::R19, -12 }, { Hexagon::R18, -16 }, { Hexagon::D9, -16 }, @@ -83,22 +85,61 @@ private: void expandAlloca(MachineInstr *AI, const HexagonInstrInfo &TII, unsigned SP, unsigned CF) const; - void insertPrologueInBlock(MachineBasicBlock &MBB) const; + void insertPrologueInBlock(MachineBasicBlock &MBB, bool PrologueStubs) const; void insertEpilogueInBlock(MachineBasicBlock &MBB) const; bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, - const HexagonRegisterInfo &HRI) const; + const HexagonRegisterInfo &HRI, bool &PrologueStubs) const; bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, const HexagonRegisterInfo &HRI) const; + bool updateExitPaths(MachineBasicBlock &MBB, MachineBasicBlock *RestoreB, + BitVector &DoneT, BitVector &DoneF, BitVector &Path) const; void insertCFIInstructionsAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator At) const; void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const; - bool replacePredRegPseudoSpillCode(MachineFunction &MF) const; - bool replaceVecPredRegPseudoSpillCode(MachineFunction &MF) const; + + bool expandCopy(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandStoreInt(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandLoadInt(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandStoreVecPred(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandLoadVecPred(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandStoreVec2(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandLoadVec2(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandStoreVec(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandLoadVec(MachineBasicBlock &B, MachineBasicBlock::iterator It, + MachineRegisterInfo &MRI, const HexagonInstrInfo &HII, + SmallVectorImpl &NewRegs) const; + bool expandSpillMacros(MachineFunction &MF, + SmallVectorImpl &NewRegs) const; + + unsigned findPhysReg(MachineFunction &MF, HexagonBlockRanges::IndexRange &FIR, + HexagonBlockRanges::InstrIndexMap &IndexMap, + HexagonBlockRanges::RegToRangeMap &DeadMap, + const TargetRegisterClass *RC) const; + void optimizeSpillSlots(MachineFunction &MF, + SmallVectorImpl &VRegs) const; void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const; + void addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, const CSIVect &CSI, + bool IsDef, bool IsKill) const; bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const; bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const; bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp index f26e2ff764d7..f46b6d2a82e3 100644 --- a/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -242,6 +242,9 @@ bool HexagonGenExtract::visitBlock(BasicBlock *B) { bool HexagonGenExtract::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + DT = &getAnalysis().getDomTree(); bool Changed; diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index 64a2b6cec18a..71d079193d79 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -9,11 +9,8 @@ #define DEBUG_TYPE "hexinsert" -#include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -21,10 +18,12 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,7 +32,6 @@ #include "HexagonTargetMachine.h" #include "HexagonBitTracker.h" -#include #include using namespace llvm; @@ -1446,7 +1444,7 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { bool AllDead = true; SmallVector Regs; - for (ConstMIOperands Op(MI); Op.isValid(); ++Op) { + for (ConstMIOperands Op(*MI); Op.isValid(); ++Op) { if (!Op->isReg() || !Op->isDef()) continue; unsigned R = Op->getReg(); @@ -1471,6 +1469,9 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; bool Changed = false; TimerGroup __G("hexinsert"); diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index c059d566709e..bb9256db4b48 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -49,6 +49,10 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } private: const HexagonInstrInfo *HII; @@ -70,10 +74,10 @@ namespace { MachineOperand *SrcT, *SrcF; MachineInstr *Def1, *Def2; MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR, - MachineOperand *TOp, MachineOperand *FOp, - MachineInstr *D1, MachineInstr *D2) - : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1), - Def2(D2) {} + MachineOperand *TOp, MachineOperand *FOp, MachineInstr &D1, + MachineInstr &D2) + : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(&D1), + Def2(&D2) {} }; typedef DenseMap InstrIndexMap; typedef DenseMap DefUseInfoMap; @@ -128,7 +132,7 @@ void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, expandReg(*R++, Uses); // Look over all operands, and collect explicit defs and uses. - for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) { + for (ConstMIOperands Mo(*MI); Mo.isValid(); ++Mo) { if (!Mo->isReg() || Mo->isImplicit()) continue; unsigned R = Mo->getReg(); @@ -258,8 +262,8 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin(); std::advance(It1, MinX); std::advance(It2, MaxX); - MachineInstr *Def1 = It1, *Def2 = It2; - MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2); + MachineInstr &Def1 = *It1, &Def2 = *It2; + MachineOperand *Src1 = &Def1.getOperand(2), *Src2 = &Def2.getOperand(2); unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0; unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0; bool Failure = false, CanUp = true, CanDown = true; @@ -305,6 +309,8 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { } bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; HII = MF.getSubtarget().getInstrInfo(); HRI = MF.getSubtarget().getRegisterInfo(); bool Changed = false; @@ -316,4 +322,3 @@ bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) { FunctionPass *llvm::createHexagonGenMux() { return new HexagonGenMux(); } - diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp index d9675b5173d2..dcfd3e8317a9 100644 --- a/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -9,24 +9,22 @@ #define DEBUG_TYPE "gen-pred" +#include "HexagonTargetMachine.h" #include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "HexagonTargetMachine.h" +#include "llvm/Target/TargetMachine.h" #include #include #include -#include using namespace llvm; @@ -157,7 +155,7 @@ unsigned HexagonGenPredicate::getPredForm(unsigned Opc) { // The opcode corresponding to 0 is TargetOpcode::PHI. We can use 0 here // to denote "none", but we need to make sure that none of the valid opcodes // that we return will ever be 0. - assert(PHI == 0 && "Use different value for "); + static_assert(PHI == 0, "Use different value for "); return 0; } @@ -332,7 +330,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) { case Hexagon::C4_or_orn: case Hexagon::C2_xor: // Add operands to the queue. - for (ConstMIOperands Mo(DefI); Mo.isValid(); ++Mo) + for (ConstMIOperands Mo(*DefI); Mo.isValid(); ++Mo) if (Mo->isReg() && Mo->isUse()) WorkQ.push(Register(Mo->getReg())); break; @@ -449,13 +447,12 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { // the convertible instruction is converted, its predicate result will be // copied back into the original gpr. - for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) { - MachineBasicBlock &B = *A; - for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { - if (I->getOpcode() != TargetOpcode::COPY) + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() != TargetOpcode::COPY) continue; - Register DR = I->getOperand(0); - Register SR = I->getOperand(1); + Register DR = MI.getOperand(0); + Register SR = MI.getOperand(1); if (!TargetRegisterInfo::isVirtualRegister(DR.R)) continue; if (!TargetRegisterInfo::isVirtualRegister(SR.R)) @@ -466,7 +463,7 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { continue; assert(!DR.S && !SR.S && "Unexpected subregister"); MRI->replaceRegWith(DR.R, SR.R); - Erase.insert(I); + Erase.insert(&MI); Changed = true; } } @@ -479,6 +476,9 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index d20a809d6c09..cc154c4be012 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -346,6 +346,8 @@ FunctionPass *llvm::createHexagonHardwareLoops() { bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); + if (skipFunction(*MF.getFunction())) + return false; bool Changed = false; @@ -434,7 +436,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, SmallVector Cond; MachineBasicBlock *TB = nullptr, *FB = nullptr; - bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); + bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); if (NotAnalyzed) return false; @@ -448,8 +450,8 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, unsigned CmpReg1 = 0, CmpReg2 = 0; int CmpImm = 0, CmpMask = 0; - bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2, - CmpMask, CmpImm); + bool CmpAnalyzed = + TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm); // Fail if the compare was not analyzed, or it's not comparing a register // with an immediate value. Not checking the mask here, since we handle // the individual compare opcodes (including A4_cmpb*) later on. @@ -581,7 +583,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, SmallVector Cond; MachineBasicBlock *TB = nullptr, *FB = nullptr; - bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); + bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); if (NotAnalyzed) return nullptr; @@ -593,7 +595,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { MachineBasicBlock *LTB = 0, *LFB = 0; SmallVector LCond; - bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false); + bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); if (NotAnalyzed) return nullptr; if (TB == Latch) @@ -618,8 +620,8 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, unsigned CmpReg1 = 0, CmpReg2 = 0; int Mask = 0, ImmValue = 0; - bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2, - Mask, ImmValue); + bool AnalyzedCmp = + TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue); if (!AnalyzedCmp) return nullptr; @@ -1184,7 +1186,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, MachineBasicBlock *TB = 0, *FB = 0; SmallVector Cond; - if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false)) + if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false)) return false; if (L->contains(TB)) @@ -1418,12 +1420,12 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( unsigned CmpReg1 = 0, CmpReg2 = 0; int CmpMask = 0, CmpValue = 0; - if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) + if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) continue; MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; - if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) + if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) continue; Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0); @@ -1619,14 +1621,14 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { MachineBasicBlock *TB = nullptr, *FB = nullptr; SmallVector Cond; // AnalyzeBranch returns true if it fails to analyze branch. - bool NotAnalyzed = TII->AnalyzeBranch(*ExitingBlock, TB, FB, Cond, false); + bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); if (NotAnalyzed || Cond.empty()) return false; if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { MachineBasicBlock *LTB = 0, *LFB = 0; SmallVector LCond; - bool NotAnalyzed = TII->AnalyzeBranch(*Latch, LTB, LFB, LCond, false); + bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); if (NotAnalyzed) return false; @@ -1837,12 +1839,12 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( SmallVector Tmp1; MachineBasicBlock *TB = nullptr, *FB = nullptr; - if (TII->AnalyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) + if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) return nullptr; for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { MachineBasicBlock *PB = *I; - bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); + bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false); if (NotAnalyzed) return nullptr; } @@ -1928,7 +1930,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( MachineBasicBlock *PB = *I; if (PB != Latch) { Tmp2.clear(); - bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false); + bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false); (void)NotAnalyzed; // suppress compiler warning assert (!NotAnalyzed && "Should be analyzable!"); if (TB != Header && (Tmp2.empty() || FB != Header)) @@ -1940,7 +1942,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( // It can happen that the latch block will fall through into the header. // Insert an unconditional branch to the header. TB = FB = nullptr; - bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false); + bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false); (void)LatchNotAnalyzed; // suppress compiler warning assert (!LatchNotAnalyzed && "Should be analyzable!"); if (!TB && !FB) diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index a0da945e7572..22247aa39b61 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -15,13 +15,11 @@ #include "HexagonISelLowering.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonTargetMachine.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -38,17 +36,13 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", // Instruction Selector Implementation //===----------------------------------------------------------------------===// -namespace llvm { - void initializeHexagonDAGToDAGISelPass(PassRegistry&); -} - //===--------------------------------------------------------------------===// /// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine /// instructions for SelectionDAG operations. /// namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { - const HexagonTargetMachine& HTM; + const HexagonTargetMachine &HTM; const HexagonSubtarget *HST; const HexagonInstrInfo *HII; const HexagonRegisterInfo *HRI; @@ -56,9 +50,7 @@ public: explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr), - HRI(nullptr) { - initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); - } + HRI(nullptr) {} bool runOnMachineFunction(MachineFunction &MF) override { // Reset the subtarget each time through. @@ -72,7 +64,7 @@ public: virtual void PreprocessISelDAG() override; virtual void EmitFunctionEntryCode() override; - SDNode *Select(SDNode *N) override; + void Select(SDNode *N) override; // Complex Pattern Selectors. inline bool SelectAddrGA(SDValue &N, SDValue &R); @@ -84,36 +76,41 @@ public: return "Hexagon DAG->DAG Pattern Instruction Selection"; } - SDNode *SelectFrameIndex(SDNode *N); + // Generate a machine instruction node corresponding to the circ/brev + // load intrinsic. + MachineSDNode *LoadInstrForLoadIntrinsic(SDNode *IntN); + // Given the circ/brev load intrinsic and the already generated machine + // instruction, generate the appropriate store (that is a part of the + // intrinsic's functionality). + SDNode *StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN); + + void SelectFrameIndex(SDNode *N); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; - SDNode *SelectLoad(SDNode *N); - SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); - SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl); - SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode, - SDLoc dl); - SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode, - SDLoc dl); - SDNode *SelectBaseOffsetStore(StoreSDNode *ST, SDLoc dl); - SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl); - SDNode *SelectStore(SDNode *N); - SDNode *SelectSHL(SDNode *N); - SDNode *SelectMul(SDNode *N); - SDNode *SelectZeroExtend(SDNode *N); - SDNode *SelectIntrinsicWChain(SDNode *N); - SDNode *SelectIntrinsicWOChain(SDNode *N); - SDNode *SelectConstant(SDNode *N); - SDNode *SelectConstantFP(SDNode *N); - SDNode *SelectAdd(SDNode *N); - SDNode *SelectBitOp(SDNode *N); + bool tryLoadOfLoadIntrinsic(LoadSDNode *N); + void SelectLoad(SDNode *N); + void SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl); + void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl); + void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl); + void SelectStore(SDNode *N); + void SelectSHL(SDNode *N); + void SelectMul(SDNode *N); + void SelectZeroExtend(SDNode *N); + void SelectIntrinsicWChain(SDNode *N); + void SelectIntrinsicWOChain(SDNode *N); + void SelectConstant(SDNode *N); + void SelectConstantFP(SDNode *N); + void SelectAdd(SDNode *N); + void SelectBitcast(SDNode *N); + void SelectBitOp(SDNode *N); // XformMskToBitPosU5Imm - Returns the bit position which // the single bit 32 bit mask represents. // Used in Clr and Set bit immediate memops. - SDValue XformMskToBitPosU5Imm(uint32_t Imm, SDLoc DL) { + SDValue XformMskToBitPosU5Imm(uint32_t Imm, const SDLoc &DL) { int32_t bitPos; bitPos = Log2_32(Imm); assert(bitPos >= 0 && bitPos < 32 && @@ -123,13 +120,13 @@ public: // XformMskToBitPosU4Imm - Returns the bit position which the single-bit // 16 bit mask represents. Used in Clr and Set bit immediate memops. - SDValue XformMskToBitPosU4Imm(uint16_t Imm, SDLoc DL) { + SDValue XformMskToBitPosU4Imm(uint16_t Imm, const SDLoc &DL) { return XformMskToBitPosU5Imm(Imm, DL); } // XformMskToBitPosU3Imm - Returns the bit position which the single-bit // 8 bit mask represents. Used in Clr and Set bit immediate memops. - SDValue XformMskToBitPosU3Imm(uint8_t Imm, SDLoc DL) { + SDValue XformMskToBitPosU3Imm(uint8_t Imm, const SDLoc &DL) { return XformMskToBitPosU5Imm(Imm, DL); } @@ -142,36 +139,36 @@ public: // XformM5ToU5Imm - Return a target constant with the specified value, of // type i32 where the negative literal is transformed into a positive literal // for use in -= memops. - inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) { - assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); - return CurDAG->getTargetConstant(-Imm, DL, MVT::i32); + inline SDValue XformM5ToU5Imm(signed Imm, const SDLoc &DL) { + assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant(-Imm, DL, MVT::i32); } // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range // [1..128], used in cmpb.gtu instructions. - inline SDValue XformU7ToU7M1Imm(signed Imm, SDLoc DL) { + inline SDValue XformU7ToU7M1Imm(signed Imm, const SDLoc &DL) { assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op"); return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8); } // XformS8ToS8M1Imm - Return a target constant decremented by 1. - inline SDValue XformSToSM1Imm(signed Imm, SDLoc DL) { + inline SDValue XformSToSM1Imm(signed Imm, const SDLoc &DL) { return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32); } // XformU8ToU8M1Imm - Return a target constant decremented by 1. - inline SDValue XformUToUM1Imm(unsigned Imm, SDLoc DL) { + inline SDValue XformUToUM1Imm(unsigned Imm, const SDLoc &DL) { assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32); } // XformSToSM2Imm - Return a target constant decremented by 2. - inline SDValue XformSToSM2Imm(unsigned Imm, SDLoc DL) { + inline SDValue XformSToSM2Imm(unsigned Imm, const SDLoc &DL) { return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32); } // XformSToSM3Imm - Return a target constant decremented by 3. - inline SDValue XformSToSM3Imm(unsigned Imm, SDLoc DL) { + inline SDValue XformSToSM3Imm(unsigned Imm, const SDLoc &DL) { return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32); } @@ -180,6 +177,8 @@ public: private: bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); + bool orIsAdd(const SDNode *N) const; + bool isAlignedMemNode(const MemSDNode *N) const; }; // end HexagonDAGToDAGISel } // end anonymous namespace @@ -194,18 +193,6 @@ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, } } -static void initializePassOnce(PassRegistry &Registry) { - const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; - PassInfo *PI = new PassInfo(Name, "hexagon-isel", - &SelectionDAGISel::ID, nullptr, false, false); - Registry.registerPass(*PI, true); -} - -void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializePassOnce) -} - - // Intrinsics that return a a predicate. static bool doesIntrinsicReturnPredicate(unsigned ID) { switch (ID) { @@ -251,127 +238,11 @@ static bool doesIntrinsicReturnPredicate(unsigned ID) { } } -SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, - unsigned Opcode, - SDLoc dl) { - SDValue Chain = LD->getChain(); - EVT LoadedVT = LD->getMemoryVT(); - SDValue Base = LD->getBasePtr(); - SDValue Offset = LD->getOffset(); - SDNode *OffsetNode = Offset.getNode(); - int32_t Val = cast(OffsetNode)->getSExtValue(); - - if (HII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, - MVT::Other, Base, TargetConst, - Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, - SDValue(Result_1, 0)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) }; - const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_1, 1), - SDValue(Result_1, 2) }; - ReplaceUses(Froms, Tos, 3); - return Result_2; - } - - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, - Base, TargetConst0, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, - SDValue(Result_1, 0)); - SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, - Base, TargetConstVal, - SDValue(Result_1, 1)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) }; - const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_3, 0), - SDValue(Result_1, 1) }; - ReplaceUses(Froms, Tos, 3); - return Result_2; -} - - -SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, - unsigned Opcode, - SDLoc dl) { - SDValue Chain = LD->getChain(); - EVT LoadedVT = LD->getMemoryVT(); - SDValue Base = LD->getBasePtr(); - SDValue Offset = LD->getOffset(); - SDNode *OffsetNode = Offset.getNode(); - int32_t Val = cast(OffsetNode)->getSExtValue(); - - if (HII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::i32, MVT::Other, Base, - TargetConstVal, Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, - MVT::i64, MVT::Other, - TargetConst0, - SDValue(Result_1,0)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) }; - const SDValue Tos[] = { SDValue(Result_2, 0), - SDValue(Result_1, 1), - SDValue(Result_1, 2) }; - ReplaceUses(Froms, Tos, 3); - return Result_2; - } - - // Generate an indirect load. - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, Base, TargetConst0, - Chain); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl, - MVT::i64, MVT::Other, - TargetConst0, - SDValue(Result_1,0)); - // Add offset to base. - SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, - Base, TargetConstVal, - SDValue(Result_1, 1)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) }; - const SDValue Tos[] = { SDValue(Result_2, 0), // Load value. - SDValue(Result_3, 0), // New address. - SDValue(Result_1, 1) }; - ReplaceUses(Froms, Tos, 3); - return Result_2; -} - - -SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { +void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Offset = LD->getOffset(); - SDNode *OffsetNode = Offset.getNode(); - // Get the constant value. - int32_t Val = cast(OffsetNode)->getSExtValue(); + int32_t Inc = cast(Offset.getNode())->getSExtValue(); EVT LoadedVT = LD->getMemoryVT(); unsigned Opcode = 0; @@ -379,232 +250,394 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { // loads. ISD::LoadExtType ExtType = LD->getExtensionType(); bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); - bool HasVecOffset = false; + bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc); - // Figure out the opcode. - if (LoadedVT == MVT::i64) { - if (HII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = Hexagon::L2_loadrd_pi; + assert(LoadedVT.isSimple()); + switch (LoadedVT.getSimpleVT().SimpleTy) { + case MVT::i8: + if (IsZeroExt) + Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io; else - Opcode = Hexagon::L2_loadrd_io; - } else if (LoadedVT == MVT::i32) { - if (HII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = Hexagon::L2_loadri_pi; - else - Opcode = Hexagon::L2_loadri_io; - } else if (LoadedVT == MVT::i16) { - if (HII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; - else - Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; - } else if (LoadedVT == MVT::i8) { - if (HII->isValidAutoIncImm(LoadedVT, Val)) - Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; + Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io; + break; + case MVT::i16: + if (IsZeroExt) + Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io; else - Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; - } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 || - LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) { - HasVecOffset = true; - if (HII->isValidAutoIncImm(LoadedVT, Val)) { - Opcode = Hexagon::V6_vL32b_pi; - } + Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io; + break; + case MVT::i32: + Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io; + break; + case MVT::i64: + Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io; + break; + // 64B + case MVT::v64i8: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v8i64: + if (isAlignedMemNode(LD)) + Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai; else - Opcode = Hexagon::V6_vL32b_ai; + Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai; + break; // 128B - } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 || - LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) { - HasVecOffset = true; - if (HII->isValidAutoIncImm(LoadedVT, Val)) { - Opcode = Hexagon::V6_vL32b_pi_128B; - } + case MVT::v128i8: + case MVT::v64i16: + case MVT::v32i32: + case MVT::v16i64: + if (isAlignedMemNode(LD)) + Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B + : Hexagon::V6_vL32b_ai_128B; else - Opcode = Hexagon::V6_vL32b_ai_128B; - } else - llvm_unreachable("unknown memory type"); - - // For zero extended i64 loads, we need to add combine instructions. - if (LD->getValueType(0) == MVT::i64 && IsZeroExt) - return SelectIndexedLoadZeroExtend64(LD, Opcode, dl); - // Handle sign extended i64 loads. - if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD) - return SelectIndexedLoadSignExtend64(LD, Opcode, dl); - - if (HII->isValidAutoIncImm(LoadedVT, Val)) { - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); - SDNode* Result = CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - MVT::i32, MVT::Other, Base, - TargetConstVal, Chain); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast(Result)->setMemRefs(MemOp, MemOp + 1); - if (HasVecOffset) { - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result, 0), - SDValue(Result, 2) - }; - ReplaceUses(Froms, Tos, 2); - } else { - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result, 0), - SDValue(Result, 1), - SDValue(Result, 2) - }; - ReplaceUses(Froms, Tos, 3); + Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi_128B + : Hexagon::V6_vL32Ub_ai_128B; + break; + default: + llvm_unreachable("Unexpected memory type in indexed load"); + } + + SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + + auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl) + -> MachineSDNode* { + if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) { + SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); + return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64, + Zero, SDValue(N, 0)); } - return Result; + if (ExtType == ISD::SEXTLOAD) + return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64, + SDValue(N, 0)); + return N; + }; + + // Loaded value Next address Chain + SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) }; + SDValue To[3]; + + EVT ValueVT = LD->getValueType(0); + if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) { + // A load extending to i64 will actually produce i32, which will then + // need to be extended to i64. + assert(LoadedVT.getSizeInBits() <= 32); + ValueVT = MVT::i32; + } + + if (IsValidInc) { + MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, + MVT::i32, MVT::Other, Base, + IncV, Chain); + L->setMemRefs(MemOp, MemOp+1); + To[1] = SDValue(L, 1); // Next address. + To[2] = SDValue(L, 2); // Chain. + // Handle special case for extension to i64. + if (LD->getValueType(0) == MVT::i64) + L = getExt64(L, dl); + To[0] = SDValue(L, 0); // Loaded (extended) value. } else { - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); - SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - MVT::Other, Base, TargetConst0, - Chain); - SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, - Base, TargetConstVal, - SDValue(Result_1, 1)); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = LD->getMemOperand(); - cast(Result_1)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result_1, 0), - SDValue(Result_2, 0), - SDValue(Result_1, 1) - }; - ReplaceUses(Froms, Tos, 3); - return Result_1; + SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); + MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other, + Base, Zero, Chain); + L->setMemRefs(MemOp, MemOp+1); + To[2] = SDValue(L, 1); // Chain. + MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, IncV); + To[1] = SDValue(A, 0); // Next address. + // Handle special case for extension to i64. + if (LD->getValueType(0) == MVT::i64) + L = getExt64(L, dl); + To[0] = SDValue(L, 0); // Loaded (extended) value. } + ReplaceUses(From, To, 3); + CurDAG->RemoveDeadNode(LD); } -SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) { - SDNode *result; +MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) { + if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return nullptr; + + SDLoc dl(IntN); + unsigned IntNo = cast(IntN->getOperand(1))->getZExtValue(); + + static std::map LoadPciMap = { + { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci }, + { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci }, + { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci }, + { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci }, + { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci }, + { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci }, + }; + auto FLC = LoadPciMap.find(IntNo); + if (FLC != LoadPciMap.end()) { + SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32, + IntN->getOperand(4)); + EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32; + EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; + // Operands: { Base, Increment, Modifier, Chain } + auto Inc = cast(IntN->getOperand(5)); + SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32); + MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys, + { IntN->getOperand(2), I, SDValue(Mod,0), IntN->getOperand(0) }); + return Res; + } + + static std::map LoadPbrMap = { + { Intrinsic::hexagon_brev_ldb, Hexagon::L2_loadrb_pbr }, + { Intrinsic::hexagon_brev_ldub, Hexagon::L2_loadrub_pbr }, + { Intrinsic::hexagon_brev_ldh, Hexagon::L2_loadrh_pbr }, + { Intrinsic::hexagon_brev_lduh, Hexagon::L2_loadruh_pbr }, + { Intrinsic::hexagon_brev_ldw, Hexagon::L2_loadri_pbr }, + { Intrinsic::hexagon_brev_ldd, Hexagon::L2_loadrd_pbr }, + }; + auto FLB = LoadPbrMap.find(IntNo); + if (FLB != LoadPbrMap.end()) { + SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32, + IntN->getOperand(4)); + EVT ValTy = (IntNo == Intrinsic::hexagon_brev_ldd) ? MVT::i64 : MVT::i32; + EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; + // Operands: { Base, Modifier, Chain } + MachineSDNode *Res = CurDAG->getMachineNode(FLB->second, dl, RTys, + { IntN->getOperand(2), SDValue(Mod,0), IntN->getOperand(0) }); + return Res; + } + + return nullptr; +} + +SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, + SDNode *IntN) { + // The "LoadN" is just a machine load instruction. The intrinsic also + // involves storing it. Generate an appropriate store to the location + // given in the intrinsic's operand(3). + uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags; + unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) & + HexagonII::MemAccesSizeMask; + unsigned Size = 1U << (SizeBits-1); + + SDLoc dl(IntN); + MachinePointerInfo PI; + SDValue TS; + SDValue Loc = IntN->getOperand(3); + + if (Size >= 4) + TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI, + Size); + else + TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, + PI, MVT::getIntegerVT(Size * 8), Size); + + SDNode *StoreN; + { + HandleSDNode Handle(TS); + SelectStore(TS.getNode()); + StoreN = Handle.getValue().getNode(); + } + + // Load's results are { Loaded value, Updated pointer, Chain } + ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1)); + ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0)); + return StoreN; +} + +bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) { + // The intrinsics for load circ/brev perform two operations: + // 1. Load a value V from the specified location, using the addressing + // mode corresponding to the intrinsic. + // 2. Store V into a specified location. This location is typically a + // local, temporary object. + // In many cases, the program using these intrinsics will immediately + // load V again from the local object. In those cases, when certain + // conditions are met, the last load can be removed. + // This function identifies and optimizes this pattern. If the pattern + // cannot be optimized, it returns nullptr, which will cause the load + // to be selected separately from the intrinsic (which will be handled + // in SelectIntrinsicWChain). + + SDValue Ch = N->getOperand(0); + SDValue Loc = N->getOperand(1); + + // Assume that the load and the intrinsic are connected directly with a + // chain: + // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C + // t2: i32,ch = load t1:1, Loc, ... + SDNode *C = Ch.getNode(); + + if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return false; + + // The second load can only be eliminated if its extension type matches + // that of the load instruction corresponding to the intrinsic. The user + // can provide an address of an unsigned variable to store the result of + // a sign-extending intrinsic into (or the other way around). + ISD::LoadExtType IntExt; + switch (cast(C->getOperand(1))->getZExtValue()) { + case Intrinsic::hexagon_brev_ldub: + case Intrinsic::hexagon_brev_lduh: + case Intrinsic::hexagon_circ_ldub: + case Intrinsic::hexagon_circ_lduh: + IntExt = ISD::ZEXTLOAD; + break; + case Intrinsic::hexagon_brev_ldw: + case Intrinsic::hexagon_brev_ldd: + case Intrinsic::hexagon_circ_ldw: + case Intrinsic::hexagon_circ_ldd: + IntExt = ISD::NON_EXTLOAD; + break; + default: + IntExt = ISD::SEXTLOAD; + break; + } + if (N->getExtensionType() != IntExt) + return false; + + // Make sure the target location for the loaded value in the load intrinsic + // is the location from which LD (or N) is loading. + if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode()) + return false; + + if (MachineSDNode *L = LoadInstrForLoadIntrinsic(C)) { + SDNode *S = StoreInstrForLoadIntrinsic(L, C); + SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) }; + SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) }; + ReplaceUses(F, T, array_lengthof(T)); + // This transformation will leave the intrinsic dead. If it remains in + // the DAG, the selection code will see it again, but without the load, + // and it will generate a store that is normally required for it. + CurDAG->RemoveDeadNode(C); + return true; + } + + return false; +} + +void HexagonDAGToDAGISel::SelectLoad(SDNode *N) { SDLoc dl(N); LoadSDNode *LD = cast(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); // Handle indexed loads. if (AM != ISD::UNINDEXED) { - result = SelectIndexedLoad(LD, dl); - } else { - result = SelectCode(LD); + SelectIndexedLoad(LD, dl); + return; } - return result; -} + // Handle patterns using circ/brev load intrinsics. + if (tryLoadOfLoadIntrinsic(LD)) + return; + SelectCode(LD); +} -SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { +void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { SDValue Chain = ST->getChain(); SDValue Base = ST->getBasePtr(); SDValue Offset = ST->getOffset(); SDValue Value = ST->getValue(); - SDNode *OffsetNode = Offset.getNode(); // Get the constant value. - int32_t Val = cast(OffsetNode)->getSExtValue(); + int32_t Inc = cast(Offset.getNode())->getSExtValue(); EVT StoredVT = ST->getMemoryVT(); EVT ValueVT = Value.getValueType(); - // Offset value must be within representable range - // and must have correct alignment properties. - if (HII->isValidAutoIncImm(StoredVT, Val)) { - unsigned Opcode = 0; - - // Figure out the post inc version of opcode. - if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_pi; - else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi; - else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi; - else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; - else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || - StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) { - Opcode = Hexagon::V6_vS32b_pi; - } - // 128B - else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || - StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) { - Opcode = Hexagon::V6_vS32b_pi_128B; - } else llvm_unreachable("unknown memory type"); - - if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { - assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); - Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, - dl, MVT::i32, Value); - } - SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, dl, MVT::i32), Value, - Chain}; - // Build post increment store. - SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, Ops); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = ST->getMemOperand(); - cast(Result)->setMemRefs(MemOp, MemOp + 1); - - ReplaceUses(ST, Result); - ReplaceUses(SDValue(ST,1), SDValue(Result,1)); - return Result; - } - - // Note: Order of operands matches the def of instruction: - // def S2_storerd_io - // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ... - // and it differs for POST_ST* for instance. - SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, dl, MVT::i32), Value, - Chain}; + bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc); unsigned Opcode = 0; - // Figure out the opcode. - if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io; - else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; - else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; - else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; - else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || - StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) - Opcode = Hexagon::V6_vS32b_ai; + assert(StoredVT.isSimple()); + switch (StoredVT.getSimpleVT().SimpleTy) { + case MVT::i8: + Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io; + break; + case MVT::i16: + Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io; + break; + case MVT::i32: + Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io; + break; + case MVT::i64: + Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io; + break; + // 64B + case MVT::v64i8: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v8i64: + if (isAlignedMemNode(ST)) + Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai; + else + Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai; + break; // 128B - else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || - StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) - Opcode = Hexagon::V6_vS32b_ai_128B; - else llvm_unreachable("unknown memory type"); - - // Build regular store. - SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); - SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); - // Build splitted incriment instruction. - SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, - Base, - TargetConstVal, - SDValue(Result_1, 0)); + case MVT::v128i8: + case MVT::v64i16: + case MVT::v32i32: + case MVT::v16i64: + if (isAlignedMemNode(ST)) + Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B + : Hexagon::V6_vS32b_ai_128B; + else + Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi_128B + : Hexagon::V6_vS32Ub_ai_128B; + break; + default: + llvm_unreachable("Unexpected memory type in indexed store"); + } + + if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { + assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); + Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, + dl, MVT::i32, Value); + } + + SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = ST->getMemOperand(); - cast(Result_1)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(SDValue(ST,0), SDValue(Result_2,0)); - ReplaceUses(SDValue(ST,1), SDValue(Result_1,0)); - return Result_2; + // Next address Chain + SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) }; + SDValue To[2]; + + if (IsValidInc) { + // Build post increment store. + SDValue Ops[] = { Base, IncV, Value, Chain }; + MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other, + Ops); + S->setMemRefs(MemOp, MemOp + 1); + To[0] = SDValue(S, 0); + To[1] = SDValue(S, 1); + } else { + SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDValue Ops[] = { Base, Zero, Value, Chain }; + MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); + S->setMemRefs(MemOp, MemOp + 1); + To[1] = SDValue(S, 0); + MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32, + Base, IncV); + To[0] = SDValue(A, 0); + } + + ReplaceUses(From, To, 2); + CurDAG->RemoveDeadNode(ST); } -SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { +void HexagonDAGToDAGISel::SelectStore(SDNode *N) { SDLoc dl(N); StoreSDNode *ST = cast(N); ISD::MemIndexedMode AM = ST->getAddressingMode(); // Handle indexed stores. if (AM != ISD::UNINDEXED) { - return SelectIndexedStore(ST, dl); + SelectIndexedStore(ST, dl); + return; } - return SelectCode(ST); + SelectCode(ST); } -SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { +void HexagonDAGToDAGISel::SelectMul(SDNode *N) { SDLoc dl(N); // @@ -629,7 +662,8 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { SDValue Sext0 = MulOp0.getOperand(0); if (Sext0.getNode()->getValueType(0) != MVT::i32) { - return SelectCode(N); + SelectCode(N); + return; } OP0 = Sext0; @@ -638,7 +672,8 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { if (LD->getMemoryVT() != MVT::i32 || LD->getExtensionType() != ISD::SEXTLOAD || LD->getAddressingMode() != ISD::UNINDEXED) { - return SelectCode(N); + SelectCode(N); + return; } SDValue Chain = LD->getChain(); @@ -648,14 +683,16 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { LD->getBasePtr(), TargetConst0, Chain), 0); } else { - return SelectCode(N); + SelectCode(N); + return; } // Same goes for the second operand. if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { SDValue Sext1 = MulOp1.getOperand(0); if (Sext1.getNode()->getValueType(0) != MVT::i32) { - return SelectCode(N); + SelectCode(N); + return; } OP1 = Sext1; @@ -664,7 +701,8 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { if (LD->getMemoryVT() != MVT::i32 || LD->getExtensionType() != ISD::SEXTLOAD || LD->getAddressingMode() != ISD::UNINDEXED) { - return SelectCode(N); + SelectCode(N); + return; } SDValue Chain = LD->getChain(); @@ -674,20 +712,21 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { LD->getBasePtr(), TargetConst0, Chain), 0); } else { - return SelectCode(N); + SelectCode(N); + return; } // Generate a mpy instruction. SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, MVT::i64, OP0, OP1); - ReplaceUses(N, Result); - return Result; + ReplaceNode(N, Result); + return; } - return SelectCode(N); + SelectCode(N); } -SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { +void HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) == MVT::i32) { SDValue Shl_0 = N->getOperand(0); @@ -711,8 +750,8 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDNode* Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32, Mul_0, Val); - ReplaceUses(N, Result); - return Result; + ReplaceNode(N, Result); + return; } } @@ -740,8 +779,8 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDNode* Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32, Shl2_0, Val); - ReplaceUses(N, Result); - return Result; + ReplaceNode(N, Result); + return; } } } @@ -750,7 +789,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { } } } - return SelectCode(N); + SelectCode(N); } @@ -764,7 +803,7 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { // compiler. Architecture defines them as 8-bit registers. // We want to preserve all the lower 8-bits and, not just 1 LSB bit. // -SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { +void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { SDLoc dl(N); SDValue Op0 = N->getOperand(0); @@ -790,11 +829,14 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { SDValue(Mask,0), SDValue(OnesReg,0)); SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl, MVT::i32); - return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, - SDValue(And,0), SubR); + ReplaceNode(N, CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, + SDValue(And, 0), SubR)); + return; } - return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT, - SDValue(Mask,0), SDValue(OnesReg,0)); + ReplaceNode(N, + CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT, + SDValue(Mask, 0), SDValue(OnesReg, 0))); + return; } SDNode *IsIntrinsic = N->getOperand(0).getNode(); @@ -816,225 +858,37 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { MVT::i64, MVT::Other, SDValue(Result_2, 0), SDValue(Result_1, 0)); - ReplaceUses(N, Result_3); - return Result_3; + ReplaceNode(N, Result_3); + return; } if (N->getValueType(0) == MVT::i32) { // Convert the zero_extend to Rs = Pd SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, MVT::i32, SDValue(IsIntrinsic, 0)); - ReplaceUses(N, RsPd); - return RsPd; + ReplaceNode(N, RsPd); + return; } llvm_unreachable("Unexpected value type"); } } - return SelectCode(N); + SelectCode(N); } + // -// Checking for intrinsics circular load/store, and bitreverse load/store -// instrisics in order to select the correct lowered operation. +// Handling intrinsics for circular load and bitreverse load. // -SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { - unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); - if (IntNo == Intrinsic::hexagon_circ_ldd || - IntNo == Intrinsic::hexagon_circ_ldw || - IntNo == Intrinsic::hexagon_circ_lduh || - IntNo == Intrinsic::hexagon_circ_ldh || - IntNo == Intrinsic::hexagon_circ_ldub || - IntNo == Intrinsic::hexagon_circ_ldb) { - SDLoc dl(N); - SDValue Chain = N->getOperand(0); - SDValue Base = N->getOperand(2); - SDValue Load = N->getOperand(3); - SDValue ModifierExpr = N->getOperand(4); - SDValue Offset = N->getOperand(5); - - // We need to add the rerurn type for the load. This intrinsic has - // two return types, one for the load and one for the post-increment. - // Only the *_ld instructions push the extra return type, and bump the - // result node operand number correspondingly. - std::vector ResTys; - unsigned opc; - unsigned memsize, align; - MVT MvtSize = MVT::i32; - - if (IntNo == Intrinsic::hexagon_circ_ldd) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i64); - opc = Hexagon::L2_loadrd_pci_pseudo; - memsize = 8; - align = 8; - } else if (IntNo == Intrinsic::hexagon_circ_ldw) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadri_pci_pseudo; - memsize = 4; - align = 4; - } else if (IntNo == Intrinsic::hexagon_circ_ldh) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadrh_pci_pseudo; - memsize = 2; - align = 2; - MvtSize = MVT::i16; - } else if (IntNo == Intrinsic::hexagon_circ_lduh) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadruh_pci_pseudo; - memsize = 2; - align = 2; - MvtSize = MVT::i16; - } else if (IntNo == Intrinsic::hexagon_circ_ldb) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadrb_pci_pseudo; - memsize = 1; - align = 1; - MvtSize = MVT::i8; - } else if (IntNo == Intrinsic::hexagon_circ_ldub) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadrub_pci_pseudo; - memsize = 1; - align = 1; - MvtSize = MVT::i8; - } else - llvm_unreachable("no opc"); - - ResTys.push_back(MVT::Other); - - // Copy over the arguments, which are the same mostly. - SmallVector Ops; - Ops.push_back(Base); - Ops.push_back(Load); - Ops.push_back(ModifierExpr); - int32_t Val = cast(Offset.getNode())->getSExtValue(); - Ops.push_back(CurDAG->getTargetConstant(Val, dl, MVT::i32)); - Ops.push_back(Chain); - SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); - - SDValue ST; - MachineMemOperand *Mem = - MF->getMachineMemOperand(MachinePointerInfo(), - MachineMemOperand::MOStore, memsize, align); - if (MvtSize != MVT::i32) - ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, - MvtSize, Mem); - else - ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); - - SDNode* Store = SelectStore(ST.getNode()); - - const SDValue Froms[] = { SDValue(N, 0), - SDValue(N, 1) }; - const SDValue Tos[] = { SDValue(Result, 0), - SDValue(Store, 0) }; - ReplaceUses(Froms, Tos, 2); - return Result; - } - - if (IntNo == Intrinsic::hexagon_brev_ldd || - IntNo == Intrinsic::hexagon_brev_ldw || - IntNo == Intrinsic::hexagon_brev_ldh || - IntNo == Intrinsic::hexagon_brev_lduh || - IntNo == Intrinsic::hexagon_brev_ldb || - IntNo == Intrinsic::hexagon_brev_ldub) { - SDLoc dl(N); - SDValue Chain = N->getOperand(0); - SDValue Base = N->getOperand(2); - SDValue Load = N->getOperand(3); - SDValue ModifierExpr = N->getOperand(4); - - // We need to add the rerurn type for the load. This intrinsic has - // two return types, one for the load and one for the post-increment. - std::vector ResTys; - unsigned opc; - unsigned memsize, align; - MVT MvtSize = MVT::i32; - - if (IntNo == Intrinsic::hexagon_brev_ldd) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i64); - opc = Hexagon::L2_loadrd_pbr_pseudo; - memsize = 8; - align = 8; - } else if (IntNo == Intrinsic::hexagon_brev_ldw) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadri_pbr_pseudo; - memsize = 4; - align = 4; - } else if (IntNo == Intrinsic::hexagon_brev_ldh) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadrh_pbr_pseudo; - memsize = 2; - align = 2; - MvtSize = MVT::i16; - } else if (IntNo == Intrinsic::hexagon_brev_lduh) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadruh_pbr_pseudo; - memsize = 2; - align = 2; - MvtSize = MVT::i16; - } else if (IntNo == Intrinsic::hexagon_brev_ldb) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadrb_pbr_pseudo; - memsize = 1; - align = 1; - MvtSize = MVT::i8; - } else if (IntNo == Intrinsic::hexagon_brev_ldub) { - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); - opc = Hexagon::L2_loadrub_pbr_pseudo; - memsize = 1; - align = 1; - MvtSize = MVT::i8; - } else - llvm_unreachable("no opc"); - - ResTys.push_back(MVT::Other); - - // Copy over the arguments, which are the same mostly. - SmallVector Ops; - Ops.push_back(Base); - Ops.push_back(Load); - Ops.push_back(ModifierExpr); - Ops.push_back(Chain); - SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops); - SDValue ST; - MachineMemOperand *Mem = - MF->getMachineMemOperand(MachinePointerInfo(), - MachineMemOperand::MOStore, memsize, align); - if (MvtSize != MVT::i32) - ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load, - MvtSize, Mem); - else - ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem); - - SDNode* Store = SelectStore(ST.getNode()); - - const SDValue Froms[] = { SDValue(N, 0), - SDValue(N, 1) }; - const SDValue Tos[] = { SDValue(Result, 0), - SDValue(Store, 0) }; - ReplaceUses(Froms, Tos, 2); - return Result; +void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { + if (MachineSDNode *L = LoadInstrForLoadIntrinsic(N)) { + StoreInstrForLoadIntrinsic(L, N); + CurDAG->RemoveDeadNode(N); + return; } - - return SelectCode(N); + SelectCode(N); } -// -// Checking for intrinsics which have predicate registers as operand(s) -// and lowering to the actual intrinsic. -// -SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { +void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { unsigned IID = cast(N->getOperand(0))->getZExtValue(); unsigned Bits; switch (IID) { @@ -1045,42 +899,51 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { Bits = 16; break; default: - return SelectCode(N); + SelectCode(N); + return; } - SDValue const &V = N->getOperand(1); + SDValue V = N->getOperand(1); SDValue U; if (isValueExtension(V, Bits, U)) { SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - N->getOperand(0), U); - return SelectCode(R.getNode()); + N->getOperand(0), U); + ReplaceNode(N, R.getNode()); + SelectCode(R.getNode()); + return; } - return SelectCode(N); + SelectCode(N); } // // Map floating point constant values. // -SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { +void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { SDLoc dl(N); ConstantFPSDNode *CN = dyn_cast(N); - APFloat APF = CN->getValueAPF(); + const APFloat &APF = CN->getValueAPF(); if (N->getValueType(0) == MVT::f32) { - return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32, - CurDAG->getTargetConstantFP(APF.convertToFloat(), dl, MVT::f32)); + ReplaceNode( + N, CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32, + CurDAG->getTargetConstantFP( + APF.convertToFloat(), dl, MVT::f32))); + return; } else if (N->getValueType(0) == MVT::f64) { - return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64, - CurDAG->getTargetConstantFP(APF.convertToDouble(), dl, MVT::f64)); + ReplaceNode( + N, CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64, + CurDAG->getTargetConstantFP( + APF.convertToDouble(), dl, MVT::f64))); + return; } - return SelectCode(N); + SelectCode(N); } // // Map predicate true (encoded as -1 in LLVM) to a XOR. // -SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { +void HexagonDAGToDAGISel::SelectConstant(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) == MVT::i1) { SDNode* Result = 0; @@ -1091,28 +954,30 @@ SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1); } if (Result) { - ReplaceUses(N, Result); - return Result; + ReplaceNode(N, Result); + return; } } - return SelectCode(N); + SelectCode(N); } // // Map add followed by a asr -> asr +=. // -SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { +void HexagonDAGToDAGISel::SelectAdd(SDNode *N) { SDLoc dl(N); if (N->getValueType(0) != MVT::i32) { - return SelectCode(N); + SelectCode(N); + return; } // Identify nodes of the form: add(asr(...)). SDNode* Src1 = N->getOperand(0).getNode(); if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse() || Src1->getValueType(0) != MVT::i32) { - return SelectCode(N); + SelectCode(N); + return; } // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that @@ -1121,9 +986,7 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { N->getOperand(1), Src1->getOperand(0), Src1->getOperand(1)); - ReplaceUses(N, Result); - - return Result; + ReplaceNode(N, Result); } // @@ -1132,26 +995,32 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { // OR -> setbit // XOR/FNEG ->toggle_bit. // -SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { +void HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { SDLoc dl(N); EVT ValueVT = N->getValueType(0); // We handle only 32 and 64-bit bit ops. if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 || - ValueVT == MVT::f32 || ValueVT == MVT::f64)) - return SelectCode(N); + ValueVT == MVT::f32 || ValueVT == MVT::f64)) { + SelectCode(N); + return; + } // We handly only fabs and fneg for V5. unsigned Opc = N->getOpcode(); - if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps()) - return SelectCode(N); + if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps()) { + SelectCode(N); + return; + } int64_t Val = 0; if (Opc != ISD::FABS && Opc != ISD::FNEG) { if (N->getOperand(1).getOpcode() == ISD::Constant) Val = cast((N)->getOperand(1))->getSExtValue(); - else - return SelectCode(N); + else { + SelectCode(N); + return; + } } if (Opc == ISD::AND) { @@ -1159,8 +1028,10 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) || (ValueVT == MVT::i64 && isPowerOf2_64(~Val))) Val = ~Val; - else - return SelectCode(N); + else { + SelectCode(N); + return; + } } // If OR or AND is being fed by shl, srl and, sra don't do this change, @@ -1173,7 +1044,8 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { case ISD::SRA: case ISD::SRL: case ISD::SHL: - return SelectCode(N); + SelectCode(N); + return; } } @@ -1181,8 +1053,10 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { unsigned BitPos = 0; if (Opc != ISD::FABS && Opc != ISD::FNEG) { if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) || - (ValueVT == MVT::i64 && !isPowerOf2_64(Val))) - return SelectCode(N); + (ValueVT == MVT::i64 && !isPowerOf2_64(Val))) { + SelectCode(N); + return; + } // Get the bit position. BitPos = countTrailingZeros(uint64_t(Val)); @@ -1259,12 +1133,11 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { } } - ReplaceUses(N, Result); - return Result; + ReplaceNode(N, Result); } -SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { +void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { MachineFrameInfo *MFI = MF->getFrameInfo(); const HexagonFrameLowering *HFI = HST->getFrameLowering(); int FX = cast(N)->getIndex(); @@ -1290,61 +1163,91 @@ SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops); } - if (N->getHasDebugValue()) - CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0)); - return R; + ReplaceNode(N, R); } -SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { +void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) { + EVT SVT = N->getOperand(0).getValueType(); + EVT DVT = N->getValueType(0); + if (!SVT.isVector() || !DVT.isVector() || + SVT.getVectorElementType() == MVT::i1 || + DVT.getVectorElementType() == MVT::i1 || + SVT.getSizeInBits() != DVT.getSizeInBits()) { + SelectCode(N); + return; + } + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N,0), N->getOperand(0)); + CurDAG->RemoveDeadNode(N); +} + + +void HexagonDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { N->setNodeId(-1); - return nullptr; // Already selected. + return; // Already selected. } switch (N->getOpcode()) { case ISD::Constant: - return SelectConstant(N); + SelectConstant(N); + return; case ISD::ConstantFP: - return SelectConstantFP(N); + SelectConstantFP(N); + return; case ISD::FrameIndex: - return SelectFrameIndex(N); + SelectFrameIndex(N); + return; case ISD::ADD: - return SelectAdd(N); + SelectAdd(N); + return; + + case ISD::BITCAST: + SelectBitcast(N); + return; case ISD::SHL: - return SelectSHL(N); + SelectSHL(N); + return; case ISD::LOAD: - return SelectLoad(N); + SelectLoad(N); + return; case ISD::STORE: - return SelectStore(N); + SelectStore(N); + return; case ISD::MUL: - return SelectMul(N); + SelectMul(N); + return; case ISD::AND: case ISD::OR: case ISD::XOR: case ISD::FABS: case ISD::FNEG: - return SelectBitOp(N); + SelectBitOp(N); + return; case ISD::ZERO_EXTEND: - return SelectZeroExtend(N); + SelectZeroExtend(N); + return; case ISD::INTRINSIC_W_CHAIN: - return SelectIntrinsicWChain(N); + SelectIntrinsicWChain(N); + return; case ISD::INTRINSIC_WO_CHAIN: - return SelectIntrinsicWOChain(N); + SelectIntrinsicWOChain(N); + return; } - return SelectCode(N); + SelectCode(N); } bool HexagonDAGToDAGISel:: @@ -1380,7 +1283,7 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() { // Simplify: (or (select c x 0) z) -> (select c (or x z) z) // (or (select c 0 y) z) -> (select c z (or y z)) // This may not be the right thing for all targets, so do it here. - for (auto I: Nodes) { + for (auto I : Nodes) { if (I->getOpcode() != ISD::OR) continue; @@ -1392,7 +1295,7 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() { auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool { if (Op.getOpcode() != ISD::SELECT) return false; - return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2)); + return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2)); }; SDValue N0 = I->getOperand(0), N1 = I->getOperand(1); @@ -1417,6 +1320,59 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() { } } } + + // Transform: (store ch addr (add x (add (shl y c) e))) + // to: (store ch addr (add x (shl (add y d) c))), + // where e = (shl d c) for some integer d. + // The purpose of this is to enable generation of loads/stores with + // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift + // value c must be 0, 1 or 2. + for (auto I : Nodes) { + if (I->getOpcode() != ISD::STORE) + continue; + + // I matched: (store ch addr Off) + SDValue Off = I->getOperand(2); + // Off needs to match: (add x (add (shl y c) (shl d c)))) + if (Off.getOpcode() != ISD::ADD) + continue; + // Off matched: (add x T0) + SDValue T0 = Off.getOperand(1); + // T0 needs to match: (add T1 T2): + if (T0.getOpcode() != ISD::ADD) + continue; + // T0 matched: (add T1 T2) + SDValue T1 = T0.getOperand(0); + SDValue T2 = T0.getOperand(1); + // T1 needs to match: (shl y c) + if (T1.getOpcode() != ISD::SHL) + continue; + SDValue C = T1.getOperand(1); + ConstantSDNode *CN = dyn_cast(C.getNode()); + if (CN == nullptr) + continue; + unsigned CV = CN->getZExtValue(); + if (CV > 2) + continue; + // T2 needs to match e, where e = (shl d c) for some d. + ConstantSDNode *EN = dyn_cast(T2.getNode()); + if (EN == nullptr) + continue; + unsigned EV = EN->getZExtValue(); + if (EV % (1 << CV) != 0) + continue; + unsigned DV = EV / (1 << CV); + + // Replace T0 with: (shl (add y d) c) + SDLoc DL = SDLoc(I); + EVT VT = T0.getValueType(); + SDValue D = DAG.getConstant(DV, DL, VT); + // NewAdd = (add y d) + SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D); + // NewShl = (shl NewAdd c) + SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C); + ReplaceNode(T0.getNode(), NewShl.getNode()); + } } void HexagonDAGToDAGISel::EmitFunctionEntryCode() { @@ -1561,3 +1517,26 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, } return false; } + + +bool HexagonDAGToDAGISel::orIsAdd(const SDNode *N) const { + assert(N->getOpcode() == ISD::OR); + auto *C = dyn_cast(N->getOperand(1)); + assert(C); + + // Detect when "or" is used to add an offset to a stack object. + if (auto *FN = dyn_cast(N->getOperand(0))) { + MachineFrameInfo *MFI = MF->getFrameInfo(); + unsigned A = MFI->getObjectAlignment(FN->getIndex()); + assert(isPowerOf2_32(A)); + int32_t Off = C->getSExtValue(); + // If the alleged offset fits in the zero bits guaranteed by + // the alignment, then this or is really an add. + return (Off >= 0) && (((A-1) & Off) == unsigned(Off)); + } + return false; +} + +bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { + return N->getAlignment() >= N->getMemoryVT().getStoreSize(); +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 01670902e2b0..cdd4c2f8617d 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -389,9 +389,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, bool UseHVX = HST.useHVXOps(); bool UseHVXDbl = HST.useHVXDblOps(); - if (LocVT == MVT::i1 || - LocVT == MVT::i8 || - LocVT == MVT::i16) { + if (LocVT == MVT::i1) { + // Return values of type MVT::i1 still need to be assigned to R0, but + // the value type needs to remain i1. LowerCallResult will deal with it, + // but it needs to recognize i1 as the value type. + LocVT = MVT::i32; + } else if (LocVT == MVT::i8 || LocVT == MVT::i16) { LocVT = MVT::i32; ValVT = MVT::i32; if (ArgFlags.isSExt()) @@ -443,9 +446,14 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - if (LocVT == MVT::i32 || LocVT == MVT::f32) { - if (unsigned Reg = State.AllocateReg(Hexagon::R0)) { + // Note that use of registers beyond R1 is not ABI compliant. However there + // are (experimental) IR passes which generate internal functions that + // return structs using these additional registers. + static const uint16_t RegList[] = { Hexagon::R0, Hexagon::R1, + Hexagon::R2, Hexagon::R3, + Hexagon::R4, Hexagon::R5}; + if (unsigned Reg = State.AllocateReg(RegList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -505,15 +513,13 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, return false; } -void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { +void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) { if (VT != PromotedLdStVT) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType(ISD::LOAD, VT, PromotedLdStVT); - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::STORE, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType(ISD::STORE, VT, PromotedLdStVT); } } @@ -528,10 +534,9 @@ const { /// specified by the specific parameter attribute. The copy will be passed as /// a byval function parameter. Sometimes what we are copying is the end of a /// larger object, the part that does not fit in registers. -static SDValue -CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, - ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - SDLoc dl) { +static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, + SDValue Chain, ISD::ArgFlagsTy Flags, + SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), @@ -554,11 +559,11 @@ static bool IsHvxVectorType(MVT ty) { // passed by value, the function prototype is modified to return void and // the value is stored in memory pointed by a pointer passed by caller. SDValue -HexagonTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, +HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const { + const SDLoc &dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector RVLocs; @@ -608,16 +613,11 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { /// Chain/InFlag are the input chain/flag to use, and that TheCall is the call /// being lowered. Returns a SDNode with the same number of values as the /// ISD::CALL. -SDValue -HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const - SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals, - const SmallVectorImpl &OutVals, - SDValue Callee) const { - +SDValue HexagonTargetLowering::LowerCallResult( + SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals, + const SmallVectorImpl &OutVals, SDValue Callee) const { // Assign locations to each value returned by this call. SmallVector RVLocs; @@ -628,11 +628,30 @@ HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { - Chain = DAG.getCopyFromReg(Chain, dl, - RVLocs[i].getLocReg(), - RVLocs[i].getValVT(), InFlag).getValue(1); - InFlag = Chain.getValue(2); - InVals.push_back(Chain.getValue(0)); + SDValue RetVal; + if (RVLocs[i].getValVT() == MVT::i1) { + // Return values of type MVT::i1 require special handling. The reason + // is that MVT::i1 is associated with the PredRegs register class, but + // values of that type are still returned in R0. Generate an explicit + // copy into a predicate register from R0, and treat the value of the + // predicate register as the call result. + auto &MRI = DAG.getMachineFunction().getRegInfo(); + SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), + MVT::i32, InFlag); + // FR0 = (Value, Chain, Glue) + unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR, + FR0.getValue(0), FR0.getValue(2)); + // TPR = (Chain, Glue) + RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1, + TPR.getValue(1)); + } else { + RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag); + } + InVals.push_back(RetVal.getValue(0)); + Chain = RetVal.getValue(1); + InFlag = RetVal.getValue(2); } return Chain; @@ -759,8 +778,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else { MachinePointerInfo LocPI = MachinePointerInfo::getStack( DAG.getMachineFunction(), LocMemOffset); - SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false, - false, 0); + SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI); MemOpChains.push_back(S); } continue; @@ -990,6 +1008,34 @@ HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { return Op; } +// Need to transform ISD::PREFETCH into something that doesn't inherit +// all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and +// SDNPMayStore. +SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(1); + // Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in, + // if the "reg" is fed by an "add". + SDLoc DL(Op); + SDValue Zero = DAG.getConstant(0, DL, MVT::i32); + return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero); +} + +SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + // Lower the hexagon_prefetch builtin to DCFETCH, as above. + if (IntNo == Intrinsic::hexagon_prefetch) { + SDValue Addr = Op.getOperand(2); + SDLoc DL(Op); + SDValue Zero = DAG.getConstant(0, DL, MVT::i32); + return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero); + } + return SDValue(); +} + SDValue HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { @@ -1016,20 +1062,15 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue AC = DAG.getConstant(A, dl, MVT::i32); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC); - if (Op.getNode()->getHasDebugValue()) - DAG.TransferDbgValues(Op, AA); + + DAG.ReplaceAllUsesOfValueWith(Op, AA); return AA; } -SDValue -HexagonTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const - SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) -const { +SDValue HexagonTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1144,9 +1185,8 @@ const { // location. InVals.push_back(FIN); } else { - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - MachinePointerInfo(), false, false, - false, 0)); + InVals.push_back( + DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo())); } } } @@ -1174,13 +1214,13 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { HexagonMachineFunctionInfo *QFI = MF.getInfo(); SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32); const Value *SV = cast(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, - Op.getOperand(1), MachinePointerInfo(SV), false, - false, 0); + return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1), + MachinePointerInfo(SV)); } // Creates a SPLAT instruction for a constant value VAL. -static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) { +static SDValue createSplat(SelectionDAG &DAG, const SDLoc &dl, EVT VT, + SDValue Val) { if (VT.getSimpleVT() == MVT::v4i8) return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val); @@ -1301,20 +1341,14 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Loads[4]; // Base load. Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base, - LoadNode->getPointerInfo(), MVT::i16, - LoadNode->isVolatile(), - LoadNode->isNonTemporal(), - LoadNode->isInvariant(), - Alignment); + LoadNode->getPointerInfo(), MVT::i16, Alignment, + LoadNode->getMemOperand()->getFlags()); // Base+2 load. SDValue Increment = DAG.getConstant(2, DL, MVT::i32); Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, - LoadNode->isVolatile(), - LoadNode->isNonTemporal(), - LoadNode->isInvariant(), - Alignment); + LoadNode->getPointerInfo(), MVT::i16, Alignment, + LoadNode->getMemOperand()->getFlags()); // SHL 16, then OR base and base+2. SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32); SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount); @@ -1323,20 +1357,14 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { Increment = DAG.getConstant(4, DL, MVT::i32); Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, - LoadNode->isVolatile(), - LoadNode->isNonTemporal(), - LoadNode->isInvariant(), - Alignment); + LoadNode->getPointerInfo(), MVT::i16, Alignment, + LoadNode->getMemOperand()->getFlags()); // Base + 6. Increment = DAG.getConstant(6, DL, MVT::i32); Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, - LoadNode->isVolatile(), - LoadNode->isNonTemporal(), - LoadNode->isInvariant(), - Alignment); + LoadNode->getPointerInfo(), MVT::i16, Alignment, + LoadNode->getMemOperand()->getFlags()); // SHL 16, then OR base+4 and base+6. Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount); SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]); @@ -1349,8 +1377,8 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } else { // Perform default type expansion. Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), - LoadNode->isVolatile(), LoadNode->isNonTemporal(), - LoadNode->isInvariant(), LoadNode->getAlignment()); + LoadNode->getAlignment(), + LoadNode->getMemOperand()->getFlags()); LoadChain = Result.getValue(1); } } else @@ -1370,15 +1398,15 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); ConstantPoolSDNode *CPN = cast(Op); unsigned Align = CPN->getAlignment(); - Reloc::Model RM = HTM.getRelocationModel(); - unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0; + bool IsPositionIndependent = isPositionIndependent(); + unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0; SDValue T; if (CPN->isMachineConstantPoolEntry()) T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF); else T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF); - if (RM == Reloc::PIC_) + if (IsPositionIndependent) return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T); return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T); } @@ -1387,8 +1415,7 @@ SDValue HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); int Idx = cast(Op)->getIndex(); - Reloc::Model RM = HTM.getRelocationModel(); - if (RM == Reloc::PIC_) { + if (isPositionIndependent()) { SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL); return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T); } @@ -1415,7 +1442,7 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { SDValue Offset = DAG.getConstant(4, dl, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); + MachinePointerInfo()); } // Return LR, which contains the return address. Mark it an implicit live-in. @@ -1436,8 +1463,7 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { HRI.getFrameRegister(), VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, - MachinePointerInfo(), - false, false, false, 0); + MachinePointerInfo()); return FrameAddr; } @@ -1461,13 +1487,12 @@ HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const { if (RM == Reloc::Static) { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); - if (HLOF.IsGlobalInSmallSection(GV, HTM)) + if (HLOF.isGlobalInSmallSection(GV, HTM)) return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA); return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA); } - bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() || - (GV->hasLocalLinkage() && !isa(GV)); + bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); if (UsePCRel) { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset, HexagonII::MO_PCREL); @@ -1490,7 +1515,7 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { Reloc::Model RM = HTM.getRelocationModel(); if (RM == Reloc::Static) { - SDValue A = DAG.getTargetBlockAddress(BA, PtrVT); + SDValue A = DAG.getTargetBlockAddress(BA, PtrVT); return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A); } @@ -1507,6 +1532,157 @@ HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym); } +SDValue +HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, + GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, unsigned ReturnReg, + unsigned char OperandFlags) const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDLoc dl(GA); + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + GA->getValueType(0), + GA->getOffset(), + OperandFlags); + // Create Operands for the call.The Operands should have the following: + // 1. Chain SDValue + // 2. Callee which in this case is the Global address value. + // 3. Registers live into the call.In this case its R0, as we + // have just one argument to be passed. + // 4. InFlag if there is any. + // Note: The order is important. + + if (InFlag) { + SDValue Ops[] = { Chain, TGA, + DAG.getRegister(Hexagon::R0, PtrVT), *InFlag }; + Chain = DAG.getNode(HexagonISD::CALLv3, dl, NodeTys, Ops); + } else { + SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT)}; + Chain = DAG.getNode(HexagonISD::CALLv3, dl, NodeTys, Ops); + } + + // Inform MFI that function has calls. + MFI->setAdjustsStack(true); + + SDValue Flag = Chain.getValue(1); + return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); +} + +// +// Lower using the intial executable model for TLS addresses +// +SDValue +HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + SDLoc dl(GA); + int64_t Offset = GA->getOffset(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // Get the thread pointer. + SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT); + + bool IsPositionIndependent = isPositionIndependent(); + unsigned char TF = + IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE; + + // First generate the TLS symbol address + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, + Offset, TF); + + SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA); + + if (IsPositionIndependent) { + // Generate the GOT pointer in case of position independent code + SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG); + + // Add the TLS Symbol address to GOT pointer.This gives + // GOT relative relocation for the symbol. + Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym); + } + + // Load the offset value for TLS symbol.This offset is relative to + // thread pointer. + SDValue LoadOffset = + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo()); + + // Address of the thread local variable is the add of thread + // pointer and the offset of the variable. + return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset); +} + +// +// Lower using the local executable model for TLS addresses +// +SDValue +HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + SDLoc dl(GA); + int64_t Offset = GA->getOffset(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // Get the thread pointer. + SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT); + // Generate the TLS symbol address + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset, + HexagonII::MO_TPREL); + SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA); + + // Address of the thread local variable is the add of thread + // pointer and the offset of the variable. + return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym); +} + +// +// Lower using the general dynamic model for TLS addresses +// +SDValue +HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + SDLoc dl(GA); + int64_t Offset = GA->getOffset(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // First generate the TLS symbol address + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset, + HexagonII::MO_GDGOT); + + // Then, generate the GOT pointer + SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG); + + // Add the TLS symbol and the GOT pointer + SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA); + SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym); + + // Copy over the argument to R0 + SDValue InFlag; + Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag); + InFlag = Chain.getValue(1); + + return GetDynamicTLSAddr(DAG, Chain, GA, &InFlag, PtrVT, + Hexagon::R0, HexagonII::MO_GDPLT); +} + +// +// Lower TLS addresses. +// +// For now for dynamic models, we only support the general dynamic model. +// +SDValue +HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *GA = cast(Op); + + switch (HTM.getTLSModel(GA->getGlobal())) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: + return LowerToTLSGeneralDynamicModel(GA, DAG); + case TLSModel::InitialExec: + return LowerToTLSInitialExecModel(GA, DAG); + case TLSModel::LocalExec: + return LowerToTLSLocalExecModel(GA, DAG); + } + llvm_unreachable("Bogus TLS model"); +} + //===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// @@ -1524,9 +1700,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(4); setPrefFunctionAlignment(4); setMinFunctionAlignment(2); - setInsertFencesForAtomic(false); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); + setMaxAtomicSizeInBitsSupported(64); + setMinCmpXchgSizeInBits(32); + if (EnableHexSDNodeSched) setSchedulingPreference(Sched::VLIW); else @@ -1606,8 +1784,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Custom legalize GlobalAddress nodes into CONST32. @@ -1629,9 +1810,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); if (EmitJumpTables) - setMinimumJumpTableEntries(2); - else setMinimumJumpTableEntries(MinimumJumpTables); + else + setMinimumJumpTableEntries(INT_MAX); setOperationAction(ISD::BR_JT, MVT::Other, Expand); // Hexagon has instructions for add/sub with carry. The problem with @@ -1668,10 +1849,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ, MVT::i16, Promote); setOperationAction(ISD::CTTZ, MVT::i8, Promote); setOperationAction(ISD::CTTZ, MVT::i16, Promote); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Promote); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Promote); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote); // In V5, popcount can count # of 1s in i64 but returns i32. // On V4 it will be expanded (set later). @@ -1751,8 +1928,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, ISD::SMUL_LOHI, ISD::UMUL_LOHI, // Logical/bit: ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR, - ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::CTLZ_ZERO_UNDEF, - ISD::CTTZ_ZERO_UNDEF, + ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, // Floating point arithmetic/math functions: ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN, @@ -2095,7 +2271,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); EVT VT = Op.getValueType(); - if (V2.getOpcode() == ISD::UNDEF) + if (V2.isUndef()) V2 = V1; if (SVN->isSplat()) { @@ -2113,7 +2289,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { !isa(V1.getOperand(0))) { bool IsScalarToVector = true; for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + if (!V1.getOperand(i).isUndef()) { IsScalarToVector = false; break; } @@ -2235,9 +2411,9 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue V0 = BVN->getOperand(0); SDValue V1 = BVN->getOperand(1); - if (V0.getOpcode() == ISD::UNDEF) + if (V0.isUndef()) V0 = DAG.getConstant(0, dl, MVT::i32); - if (V1.getOpcode() == ISD::UNDEF) + if (V1.isUndef()) V1 = DAG.getConstant(0, dl, MVT::i32); ConstantSDNode *C0 = dyn_cast(V0); @@ -2257,7 +2433,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Try to generate a S2_packhl to build v2i16 vectors. if (VT.getSimpleVT() == MVT::v2i16) { for (unsigned i = 0, e = NElts; i != e; ++i) { - if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + if (BVN->getOperand(i).isUndef()) continue; ConstantSDNode *Cst = dyn_cast(BVN->getOperand(i)); // If the element isn't a constant, it is in a register: @@ -2285,7 +2461,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // combine, const64, etc. are Big Endian. unsigned OpIdx = NElts - i - 1; SDValue Operand = BVN->getOperand(OpIdx); - if (Operand.getOpcode() == ISD::UNDEF) + if (Operand.isUndef()) continue; int64_t Val = 0; @@ -2559,8 +2735,7 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT), DAG.getIntPtrConstant(4, dl)); - Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), - false, false, 0); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo()); Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset); // Not needed we already use it as explict input to EH_RETURN. @@ -2596,6 +2771,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { // Frame & Return address. Currently unimplemented. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); @@ -2608,7 +2784,9 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::CTPOP: return LowerCTPOP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); + case ISD::PREFETCH: return LowerPREFETCH(Op, DAG); } } @@ -2622,18 +2800,17 @@ HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T); } -MachineBasicBlock * -HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) - const { - switch (MI->getOpcode()) { - case Hexagon::ALLOCA: { - MachineFunction *MF = BB->getParent(); - auto *FuncInfo = MF->getInfo(); - FuncInfo->addAllocaAdjustInst(MI); - return BB; - } - default: llvm_unreachable("Unexpected instr type to insert"); +MachineBasicBlock *HexagonTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { + switch (MI.getOpcode()) { + case Hexagon::ALLOCA: { + MachineFunction *MF = BB->getParent(); + auto *FuncInfo = MF->getInfo(); + FuncInfo->addAllocaAdjustInst(&MI); + return BB; + } + default: + llvm_unreachable("Unexpected instr type to insert"); } // switch } @@ -2641,6 +2818,20 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Inline Assembly Support //===----------------------------------------------------------------------===// +TargetLowering::ConstraintType +HexagonTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'q': + case 'v': + if (Subtarget.useHVXOps()) + return C_Register; + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + std::pair HexagonTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { @@ -2814,6 +3005,32 @@ bool llvm::isPositiveHalfWord(SDNode *N) { } } +bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AS, unsigned Align, bool *Fast) const { + if (Fast) + *Fast = false; + + switch (VT.getSimpleVT().SimpleTy) { + default: + return false; + case MVT::v64i8: + case MVT::v128i8: + case MVT::v256i8: + case MVT::v32i16: + case MVT::v64i16: + case MVT::v128i16: + case MVT::v16i32: + case MVT::v32i32: + case MVT::v64i32: + case MVT::v8i64: + case MVT::v16i64: + case MVT::v32i64: + return true; + } + return false; +} + + std::pair HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { @@ -2892,3 +3109,10 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // Do not expand loads and stores that don't exceed 64 bits. return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64; } + +bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *AI) const { + const DataLayout &DL = AI->getModule()->getDataLayout(); + unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); + return Size >= 4 && Size <= 8; +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index bf378b922220..71f67349befe 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -94,7 +94,7 @@ bool isPositiveHalfWord(SDNode *N); bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize) const; - void promoteLdStType(EVT VT, EVT PromotedLdStVT); + void promoteLdStType(MVT VT, MVT PromotedLdStVT); const HexagonTargetMachine &HTM; const HexagonSubtarget &Subtarget; @@ -128,22 +128,37 @@ bool isPositiveHalfWord(SDNode *N); SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, - SelectionDAG &DAG, SmallVectorImpl &InVals) const override; + SDValue + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue LowerToTLSInitialExecModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain, + GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, + unsigned ReturnReg, unsigned char OperandFlags) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, SDLoc dl, - SelectionDAG &DAG, SmallVectorImpl &InVals, - const SmallVectorImpl &OutVals, SDValue Callee) const; + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals, + const SmallVectorImpl &OutVals, + SDValue Callee) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; @@ -153,14 +168,15 @@ bool isPositiveHalfWord(SDNode *N); SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, SDLoc dl, - SelectionDAG &DAG) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; - MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const override; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. @@ -192,6 +208,8 @@ bool isPositiveHalfWord(SDNode *N); ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; + std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; @@ -200,13 +218,12 @@ bool isPositiveHalfWord(SDNode *N); getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "o") return InlineAsm::Constraint_o; - else if (ConstraintCode == "v") - return InlineAsm::Constraint_v; return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } // Intrinsics SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. /// The type may be VoidTy, in which case only return true if the addressing @@ -226,6 +243,9 @@ bool isPositiveHalfWord(SDNode *N); /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + unsigned Align, bool *Fast) const override; + /// Returns relocation base for the given PIC jumptable. SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override; @@ -237,6 +257,8 @@ bool isPositiveHalfWord(SDNode *N); Value *Addr, AtomicOrdering Ord) const override; AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { return AtomicExpansionKind::LLSC; diff --git a/lib/Target/Hexagon/HexagonInstrAlias.td b/lib/Target/Hexagon/HexagonInstrAlias.td index 5a1a69b40d4d..9cbeae7c67c8 100644 --- a/lib/Target/Hexagon/HexagonInstrAlias.td +++ b/lib/Target/Hexagon/HexagonInstrAlias.td @@ -460,3 +460,195 @@ def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)", def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)", (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; +// maps if (!Pu) jumpr Rs -> if (!Pu) jumpr:nt Rs +def : InstAlias<"if (!$Pu) jumpr $Rs", + (J2_jumprf PredRegs:$Pu, IntRegs:$Rs)>, + Requires<[HasV60T]>; + +// maps if (Pu) jumpr Rs -> if (Pu) jumpr:nt Rs +def : InstAlias<"if ($Pu) jumpr $Rs", + (J2_jumprt PredRegs:$Pu, IntRegs:$Rs)>, + Requires<[HasV60T]>; + +// maps if (!Pu) jump $r15_2 -> if (!Pu) jump:nt $r15_2 +def : InstAlias<"if (!$Pu) jump $r15_2", + (J2_jumpf PredRegs:$Pu, brtarget:$r15_2)>, + Requires<[HasV60T]>; + +// maps if (Pu) jump $r15_2 -> if (Pu) jump:nt $r15_2 +def : InstAlias<"if ($Pu) jump $r15_2", + (J2_jumpt PredRegs:$Pu, brtarget:$r15_2)>, + Requires<[HasV60T]>; + +def : InstAlias<"if ($src) jump $r15_2", + (J2_jumpt PredRegs:$src, brtarget:$r15_2), 0>; + +def : InstAlias<"if (!$src) jump $r15_2", + (J2_jumpf PredRegs:$src, brtarget:$r15_2), 0>; + +def : InstAlias<"if ($src1) jumpr $src2", + (J2_jumprt PredRegs:$src1, IntRegs:$src2), 0>; + +def : InstAlias<"if (!$src1) jumpr $src2", + (J2_jumprf PredRegs:$src1, IntRegs:$src2), 0>; + +// V6_vassignp: Vector assign mapping. +let hasNewValue = 1, opNewValue = 0, isAsmParserOnly = 1 in +def HEXAGON_V6_vassignpair: CVI_VA_DV_Resource < + (outs VecDblRegs:$Vdd), + (ins VecDblRegs:$Vss), + "$Vdd = $Vss">; + +// maps Vd = #0 to Vd = vxor(Vd, Vd) +def : InstAlias<"$Vd = #0", + (V6_vxor VectorRegs:$Vd, VectorRegs:$Vd, VectorRegs:$Vd)>, + Requires<[HasV60T]>; + +// maps Vdd = #0 to Vdd = vsub(Vdd, Vdd) +def : InstAlias<"$Vdd = #0", + (V6_vsubw_dv VecDblRegs:$Vdd, VecDblRegs:$Vdd, VecDblRegs:$Vdd)>, + Requires<[HasV60T]>; + +// maps "$Qd = vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd = vcmp.eq($Vu.h, $Vv.h)" +def : InstAlias<"$Qd = vcmp.eq($Vu.uh, $Vv.uh)", + (V6_veqh VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd &= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd &= vcmp.eq($Vu.h, $Vv.h)" +def : InstAlias<"$Qd &= vcmp.eq($Vu.uh, $Vv.uh)", + (V6_veqh_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd |= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd |= vcmp.eq($Vu.h, $Vv.h)" +def : InstAlias<"$Qd |= vcmp.eq($Vu.uh, $Vv.uh)", + (V6_veqh_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd ^= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd ^= vcmp.eq($Vu.h, $Vv.h)" +def : InstAlias<"$Qd ^= vcmp.eq($Vu.uh, $Vv.uh)", + (V6_veqh_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd = vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd = vcmp.eq($Vu.w, $Vv.w)" +def : InstAlias<"$Qd = vcmp.eq($Vu.uw, $Vv.uw)", + (V6_veqw VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd &= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd &= vcmp.eq($Vu.w, $Vv.w)" +def : InstAlias<"$Qd &= vcmp.eq($Vu.uw, $Vv.uw)", + (V6_veqw_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd |= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd |= vcmp.eq($Vu.w, $Vv.w)" +def : InstAlias<"$Qd |= vcmp.eq($Vu.uw, $Vv.uw)", + (V6_veqh_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd ^= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd ^= vcmp.eq($Vu.w, $Vv.w)" +def : InstAlias<"$Qd ^= vcmp.eq($Vu.uw, $Vv.uw)", + (V6_veqw_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd = vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd = vcmp.eq($Vu.b, $Vv.b)" +def : InstAlias<"$Qd = vcmp.eq($Vu.ub, $Vv.ub)", + (V6_veqb VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd &= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd &= vcmp.eq($Vu.b, $Vv.b)" +def : InstAlias<"$Qd &= vcmp.eq($Vu.ub, $Vv.ub)", + (V6_veqb_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd |= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd |= vcmp.eq($Vu.b, $Vv.b)" +def : InstAlias<"$Qd |= vcmp.eq($Vu.ub, $Vv.ub)", + (V6_veqb_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Qd ^= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd ^= vcmp.eq($Vu.b, $Vv.b)" +def : InstAlias<"$Qd ^= vcmp.eq($Vu.ub, $Vv.ub)", + (V6_veqb_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>, + Requires<[HasV60T]>; + +// maps "$Rd.w = vextract($Vu, $Rs)" -> "$Rd = vextract($Vu, $Rs)" +def : InstAlias<"$Rd.w = vextract($Vu, $Rs)", + (V6_extractw IntRegs:$Rd, VectorRegs:$Vu, IntRegs:$Rs)>, + Requires<[HasV60T]>; + +// Mapping from vtrans2x2(Vy32,Vx32,Rt32) to vshuff(Vy32,Vx32,Rt32) +def : InstAlias<"vtrans2x2($Vy, $Vx, $Rt)", + (V6_vshuff VectorRegs:$Vy, VectorRegs:$Vx, IntRegs:$Rt)>, + Requires<[HasV60T]>; + +def : InstAlias<"$Vt=vmem($Rs)", + (V6_vL32b_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>, + Requires<[HasV60T]>; + +def : InstAlias<"$Vt=vmem($Rs):nt", + (V6_vL32b_nt_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>, + Requires<[HasV60T]>; + +def : InstAlias<"vmem($Rs)=$Vt", + (V6_vS32b_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"vmem($Rs):nt=$Vt", + (V6_vS32b_nt_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"vmem($Rs)=$Vt.new", + (V6_vS32b_new_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"vmem($Rs):nt=$Vt.new", + (V6_vS32b_nt_new_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if ($Qv) vmem($Rs)=$Vt", + (V6_vS32b_qpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if (!$Qv) vmem($Rs)=$Vt", + (V6_vS32b_nqpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if ($Qv) vmem($Rs):nt=$Vt", + (V6_vS32b_nt_qpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if (!$Qv) vmem($Rs):nt=$Vt", + (V6_vS32b_nt_nqpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if ($Pv) vmem($Rs)=$Vt", + (V6_vS32b_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if (!$Pv) vmem($Rs)=$Vt", + (V6_vS32b_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if ($Pv) vmem($Rs):nt=$Vt", + (V6_vS32b_nt_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if (!$Pv) vmem($Rs):nt=$Vt", + (V6_vS32b_nt_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"$Vt=vmemu($Rs)", + (V6_vL32Ub_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>, + Requires<[HasV60T]>; + +def : InstAlias<"vmemu($Rs)=$Vt", + (V6_vS32Ub_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if ($Pv) vmemu($Rs)=$Vt", + (V6_vS32Ub_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + +def : InstAlias<"if (!$Pv) vmemu($Rs)=$Vt", + (V6_vS32Ub_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>, + Requires<[HasV60T]>; + + diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 3c5ec1701dc2..0bfb04447f2f 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -342,6 +342,10 @@ class JInst pattern = [], string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23> : InstHexagon, OpcodeHexagon; +class JInst_CJUMP_UCJUMP pattern = [], + string cstr = "", InstrItinClass itin = J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT> + : InstHexagon, OpcodeHexagon; + // JR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class JRInst pattern = [], @@ -412,21 +416,11 @@ class STInstPI pattern = [], string cstr = ""> : STInst; -let mayStore = 1 in -class STInst2PI pattern = [], - string cstr = ""> - : STInst; - // Post increment LD Instruction. class LDInstPI pattern = [], string cstr = ""> : LDInst; -let mayLoad = 1 in -class LDInst2PI pattern = [], - string cstr = ""> - : LDInst; - //===----------------------------------------------------------------------===// // V4 Instruction Format Definitions + //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 2d1dea526eed..e17f71fe4e6a 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -139,7 +139,6 @@ class MEMInst_V4 pattern = [], string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> : MEMInst; -let isCodeGenOnly = 1 in class EXTENDERInst pattern = []> : InstHexagon, OpcodeHexagon; @@ -151,5 +150,11 @@ class SUBInst pattern = [], class CJInst pattern = [], string cstr = ""> + : InstHexagon, + OpcodeHexagon; + +class CJInst_JMPSET pattern = [], + string cstr = ""> : InstHexagon, OpcodeHexagon; + diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index eb3590cb1076..fe9f97d1d5e7 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -69,10 +70,10 @@ static cl::opt BranchRelaxAsmLarge("branch-relax-asm-large", /// /// Constants for Hexagon instructions. /// -const int Hexagon_MEMV_OFFSET_MAX_128B = 2047; // #s7 -const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7 -const int Hexagon_MEMV_OFFSET_MAX = 1023; // #s6 -const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6 +const int Hexagon_MEMV_OFFSET_MAX_128B = 896; // #s4: -8*128...7*128 +const int Hexagon_MEMV_OFFSET_MIN_128B = -1024; // #s4 +const int Hexagon_MEMV_OFFSET_MAX = 448; // #s4: -8*64...7*64 +const int Hexagon_MEMV_OFFSET_MIN = -512; // #s4 const int Hexagon_MEMW_OFFSET_MAX = 4095; const int Hexagon_MEMW_OFFSET_MIN = -4096; const int Hexagon_MEMD_OFFSET_MAX = 8191; @@ -91,10 +92,10 @@ const int Hexagon_MEMH_AUTOINC_MAX = 14; const int Hexagon_MEMH_AUTOINC_MIN = -16; const int Hexagon_MEMB_AUTOINC_MAX = 7; const int Hexagon_MEMB_AUTOINC_MIN = -8; -const int Hexagon_MEMV_AUTOINC_MAX = 192; -const int Hexagon_MEMV_AUTOINC_MIN = -256; -const int Hexagon_MEMV_AUTOINC_MAX_128B = 384; -const int Hexagon_MEMV_AUTOINC_MIN_128B = -512; +const int Hexagon_MEMV_AUTOINC_MAX = 192; // #s3 +const int Hexagon_MEMV_AUTOINC_MIN = -256; // #s3 +const int Hexagon_MEMV_AUTOINC_MAX_128B = 384; // #s3 +const int Hexagon_MEMV_AUTOINC_MIN_128B = -512; // #s3 // Pin the vtable to this file. void HexagonInstrInfo::anchor() {} @@ -230,22 +231,64 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. -unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, +unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case Hexagon::L2_loadri_io: - case Hexagon::L2_loadrd_io: - case Hexagon::L2_loadrh_io: + switch (MI.getOpcode()) { + default: + break; case Hexagon::L2_loadrb_io: case Hexagon::L2_loadrub_io: - if (MI->getOperand(2).isFI() && - MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { - FrameIndex = MI->getOperand(2).getIndex(); - return MI->getOperand(0).getReg(); - } - break; + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + case Hexagon::L2_loadri_io: + case Hexagon::L2_loadrd_io: + case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vL32b_ai_128B: + case Hexagon::V6_vL32Ub_ai: + case Hexagon::V6_vL32Ub_ai_128B: + case Hexagon::LDriw_pred: + case Hexagon::LDriw_mod: + case Hexagon::LDriq_pred_V6: + case Hexagon::LDriq_pred_vec_V6: + case Hexagon::LDriv_pseudo_V6: + case Hexagon::LDrivv_pseudo_V6: + case Hexagon::LDriq_pred_V6_128B: + case Hexagon::LDriq_pred_vec_V6_128B: + case Hexagon::LDriv_pseudo_V6_128B: + case Hexagon::LDrivv_pseudo_V6_128B: { + const MachineOperand OpFI = MI.getOperand(1); + if (!OpFI.isFI()) + return 0; + const MachineOperand OpOff = MI.getOperand(2); + if (!OpOff.isImm() || OpOff.getImm() != 0) + return 0; + FrameIndex = OpFI.getIndex(); + return MI.getOperand(0).getReg(); + } + + case Hexagon::L2_ploadrbt_io: + case Hexagon::L2_ploadrbf_io: + case Hexagon::L2_ploadrubt_io: + case Hexagon::L2_ploadrubf_io: + case Hexagon::L2_ploadrht_io: + case Hexagon::L2_ploadrhf_io: + case Hexagon::L2_ploadruht_io: + case Hexagon::L2_ploadruhf_io: + case Hexagon::L2_ploadrit_io: + case Hexagon::L2_ploadrif_io: + case Hexagon::L2_ploadrdt_io: + case Hexagon::L2_ploadrdf_io: { + const MachineOperand OpFI = MI.getOperand(2); + if (!OpFI.isFI()) + return 0; + const MachineOperand OpOff = MI.getOperand(3); + if (!OpOff.isImm() || OpOff.getImm() != 0) + return 0; + FrameIndex = OpFI.getIndex(); + return MI.getOperand(0).getReg(); + } } + return 0; } @@ -255,21 +298,58 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. -unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, +unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; + switch (MI.getOpcode()) { + default: + break; + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerh_io: case Hexagon::S2_storeri_io: case Hexagon::S2_storerd_io: - case Hexagon::S2_storerh_io: - case Hexagon::S2_storerb_io: - if (MI->getOperand(2).isFI() && - MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { - FrameIndex = MI->getOperand(0).getIndex(); - return MI->getOperand(2).getReg(); - } - break; + case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vS32b_ai_128B: + case Hexagon::V6_vS32Ub_ai: + case Hexagon::V6_vS32Ub_ai_128B: + case Hexagon::STriw_pred: + case Hexagon::STriw_mod: + case Hexagon::STriq_pred_V6: + case Hexagon::STriq_pred_vec_V6: + case Hexagon::STriv_pseudo_V6: + case Hexagon::STrivv_pseudo_V6: + case Hexagon::STriq_pred_V6_128B: + case Hexagon::STriq_pred_vec_V6_128B: + case Hexagon::STriv_pseudo_V6_128B: + case Hexagon::STrivv_pseudo_V6_128B: { + const MachineOperand &OpFI = MI.getOperand(0); + if (!OpFI.isFI()) + return 0; + const MachineOperand &OpOff = MI.getOperand(1); + if (!OpOff.isImm() || OpOff.getImm() != 0) + return 0; + FrameIndex = OpFI.getIndex(); + return MI.getOperand(2).getReg(); } + + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: { + const MachineOperand &OpFI = MI.getOperand(1); + if (!OpFI.isFI()) + return 0; + const MachineOperand &OpOff = MI.getOperand(2); + if (!OpOff.isImm() || OpOff.getImm() != 0) + return 0; + FrameIndex = OpFI.getIndex(); + return MI.getOperand(3).getReg(); + } + } + return 0; } @@ -290,7 +370,7 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, /// Cond[1] = R /// Cond[2] = Imm /// -bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, @@ -344,7 +424,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - if (!isUnpredicatedTerminator(&*I)) + if (!isUnpredicatedTerminator(*I)) return false; // Get the last instruction in the block. @@ -352,7 +432,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. for (;;) { - if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) { if (!SecondLastInst) SecondLastInst = &*I; else @@ -377,6 +457,9 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); bool LastOpcodeHasNVJump = isNewValueJump(LastInst); + if (LastOpcodeHasJMP_c && !LastInst->getOperand(1).isMBB()) + return true; + // If there is only one terminator instruction, process it. if (LastInst && !SecondLastInst) { if (LastOpcode == Hexagon::J2_jump) { @@ -412,6 +495,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst); if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) { + if (!SecondLastInst->getOperand(1).isMBB()) + return true; TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); Cond.push_back(SecondLastInst->getOperand(0)); @@ -476,10 +561,11 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return Count; } - unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, DebugLoc DL) const { + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL) const { unsigned BOpc = Hexagon::J2_jump; unsigned BccOpc = Hexagon::J2_jumpt; assert(validateBranchCond(Cond) && "Invalid branching condition"); @@ -499,9 +585,9 @@ unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB, // into an infinite loop. MachineBasicBlock *NewTBB, *NewFBB; SmallVector Cond; - MachineInstr *Term = MBB.getFirstTerminator(); - if (Term != MBB.end() && isPredicated(Term) && - !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { + auto Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && isPredicated(*Term) && + !analyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { MachineBasicBlock *NextBB = &*++MBB.getIterator(); if (NewTBB == NextBB) { ReverseBranchCondition(Cond); @@ -592,85 +678,84 @@ bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, return NumInstrs <= 4; } - void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, - unsigned SrcReg, bool KillSrc) const { + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { auto &HRI = getRegisterInfo(); + unsigned KillFlag = getKillRegState(KillSrc); + if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg) + .addReg(SrcReg, KillFlag); return; } if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg) + .addReg(SrcReg, KillFlag); return; } if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) { // Map Pd = Ps to Pd = or(Ps, Ps). - BuildMI(MBB, I, DL, get(Hexagon::C2_or), - DestReg).addReg(SrcReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::C2_or), DestReg) + .addReg(SrcReg).addReg(SrcReg, KillFlag); return; } - if (Hexagon::DoubleRegsRegClass.contains(DestReg) && + if (Hexagon::CtrRegsRegClass.contains(DestReg) && Hexagon::IntRegsRegClass.contains(SrcReg)) { - // We can have an overlap between single and double reg: r1:0 = r0. - if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { - // r1:0 = r0 - BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, - Hexagon::subreg_hireg))).addImm(0); - } else { - // r1:0 = r1 or no overlap. - BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg, - Hexagon::subreg_loreg))).addReg(SrcReg); - BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, - Hexagon::subreg_hireg))).addImm(0); - } + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg) + .addReg(SrcReg, KillFlag); return; } - if (Hexagon::CtrRegsRegClass.contains(DestReg) && + if (Hexagon::IntRegsRegClass.contains(DestReg) && + Hexagon::CtrRegsRegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrcrr), DestReg) + .addReg(SrcReg, KillFlag); + return; + } + if (Hexagon::ModRegsRegClass.contains(DestReg) && Hexagon::IntRegsRegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg) + .addReg(SrcReg, KillFlag); return; } if (Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::IntRegsRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). - addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg) + .addReg(SrcReg, KillFlag); return; } if (Hexagon::IntRegsRegClass.contains(SrcReg) && Hexagon::PredRegsRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg). - addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg) + .addReg(SrcReg, KillFlag); return; } if (Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::IntRegsRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). - addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg) + .addReg(SrcReg, KillFlag); return; } if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) { BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg). - addReg(SrcReg, getKillRegState(KillSrc)); + addReg(SrcReg, KillFlag); return; } if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg). - addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), - getKillRegState(KillSrc)). - addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), - getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg) + .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), KillFlag) + .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), KillFlag); return; } if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg). - addReg(SrcReg). - addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, KillFlag); return; } if (Hexagon::VecPredRegsRegClass.contains(SrcReg) && - Hexagon::VectorRegsRegClass.contains(DestReg)) { + Hexagon::VectorRegsRegClass.contains(DestReg)) { llvm_unreachable("Unimplemented pred to vec"); return; } @@ -680,14 +765,12 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), - HRI.getSubReg(DestReg, Hexagon::subreg_hireg)). - addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), - getKillRegState(KillSrc)); - BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), - HRI.getSubReg(DestReg, Hexagon::subreg_loreg)). - addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), - getKillRegState(KillSrc)); + unsigned DstHi = HRI.getSubReg(DestReg, Hexagon::subreg_hireg); + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DstHi) + .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), KillFlag); + unsigned DstLo = HRI.getSubReg(DestReg, Hexagon::subreg_loreg); + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DstLo) + .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), KillFlag); return; } @@ -708,6 +791,7 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); + unsigned KillFlag = getKillRegState(isKill); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, @@ -715,25 +799,57 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io)) - .addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io)) - .addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::STriw_pred)) - .addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); + } else if (Hexagon::ModRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw_mod)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); + } else if (Hexagon::VecPredRegs128BRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriq_pred_V6_128B)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); + } else if (Hexagon::VecPredRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriq_pred_V6)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); + } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { + DEBUG(dbgs() << "++Generating 128B vector spill"); + BuildMI(MBB, I, DL, get(Hexagon::STriv_pseudo_V6_128B)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); + } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { + DEBUG(dbgs() << "++Generating vector spill"); + BuildMI(MBB, I, DL, get(Hexagon::STriv_pseudo_V6)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); + } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { + DEBUG(dbgs() << "++Generating double vector spill"); + BuildMI(MBB, I, DL, get(Hexagon::STrivv_pseudo_V6)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); + } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { + DEBUG(dbgs() << "++Generating 128B double vector spill"); + BuildMI(MBB, I, DL, get(Hexagon::STrivv_pseudo_V6_128B)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else { llvm_unreachable("Unimplemented"); } } - -void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { +void HexagonInstrInfo::loadRegFromStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, + int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(I); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); @@ -742,15 +858,41 @@ void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - if (RC == &Hexagon::IntRegsRegClass) { + + if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); - } else if (RC == &Hexagon::DoubleRegsRegClass) { + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); - } else if (RC == &Hexagon::PredRegsRegClass) { + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::ModRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw_mod), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::VecPredRegs128BRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::LDriq_pred_V6_128B), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::VecPredRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::LDriq_pred_V6), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { + DEBUG(dbgs() << "++Generating 128B double vector restore"); + BuildMI(MBB, I, DL, get(Hexagon::LDrivv_pseudo_V6_128B), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { + DEBUG(dbgs() << "++Generating 128B vector restore"); + BuildMI(MBB, I, DL, get(Hexagon::LDriv_pseudo_V6_128B), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { + DEBUG(dbgs() << "++Generating vector restore"); + BuildMI(MBB, I, DL, get(Hexagon::LDriv_pseudo_V6), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { + DEBUG(dbgs() << "++Generating double vector restore"); + BuildMI(MBB, I, DL, get(Hexagon::LDrivv_pseudo_V6), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else { llvm_unreachable("Can't store this register to stack slot"); } @@ -763,48 +905,58 @@ void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, /// into real instructions. The target can edit MI in place, or it can insert /// new instructions and erase MI. The function should return true if /// anything was changed. -bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) - const { +bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { const HexagonRegisterInfo &HRI = getRegisterInfo(); - MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineBasicBlock &MBB = *MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); - unsigned Opc = MI->getOpcode(); + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Opc = MI.getOpcode(); const unsigned VecOffset = 1; bool Is128B = false; switch (Opc) { + case TargetOpcode::COPY: { + MachineOperand &MD = MI.getOperand(0); + MachineOperand &MS = MI.getOperand(1); + MachineBasicBlock::iterator MBBI = MI.getIterator(); + if (MD.getReg() != MS.getReg() && !MS.isUndef()) { + copyPhysReg(MBB, MI, DL, MD.getReg(), MS.getReg(), MS.isKill()); + std::prev(MBBI)->copyImplicitOps(*MBB.getParent(), MI); + } + MBB.erase(MBBI); + return true; + } case Hexagon::ALIGNA: - BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg()) + BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI.getOperand(0).getReg()) .addReg(HRI.getFrameRegister()) - .addImm(-MI->getOperand(1).getImm()); + .addImm(-MI.getOperand(1).getImm()); MBB.erase(MI); return true; case Hexagon::HEXAGON_V6_vassignp_128B: case Hexagon::HEXAGON_V6_vassignp: { - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); if (SrcReg != DstReg) - copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill()); + copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI.getOperand(1).isKill()); MBB.erase(MI); return true; } case Hexagon::HEXAGON_V6_lo_128B: case Hexagon::HEXAGON_V6_lo: { - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); - copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill()); + copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI.getOperand(1).isKill()); MBB.erase(MI); MRI.clearKillFlags(SrcSubLo); return true; } case Hexagon::HEXAGON_V6_hi_128B: case Hexagon::HEXAGON_V6_hi: { - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); - copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill()); + copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI.getOperand(1).isKill()); MBB.erase(MI); MRI.clearKillFlags(SrcSubHi); return true; @@ -812,24 +964,25 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) case Hexagon::STrivv_indexed_128B: Is128B = true; case Hexagon::STrivv_indexed: { - unsigned SrcReg = MI->getOperand(2).getReg(); + unsigned SrcReg = MI.getOperand(2).getReg(); unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B : Hexagon::V6_vS32b_ai; unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; - MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd)) - .addOperand(MI->getOperand(0)) - .addImm(MI->getOperand(1).getImm()) - .addReg(SrcSubLo) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MachineInstr *MI1New = + BuildMI(MBB, MI, DL, get(NewOpcd)) + .addOperand(MI.getOperand(0)) + .addImm(MI.getOperand(1).getImm()) + .addReg(SrcSubLo) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI1New->getOperand(0).setIsKill(false); BuildMI(MBB, MI, DL, get(NewOpcd)) - .addOperand(MI->getOperand(0)) - // The Vectors are indexed in multiples of vector size. - .addImm(MI->getOperand(1).getImm()+Offset) - .addReg(SrcSubHi) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + .addOperand(MI.getOperand(0)) + // The Vectors are indexed in multiples of vector size. + .addImm(MI.getOperand(1).getImm() + Offset) + .addReg(SrcSubHi) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MBB.erase(MI); return true; } @@ -840,35 +993,34 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) case Hexagon::LDrivv_indexed: { unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B : Hexagon::V6_vL32b_ai; - unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd), HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) - .addOperand(MI->getOperand(1)) - .addImm(MI->getOperand(2).getImm()); + .addOperand(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()); MI1New->getOperand(1).setIsKill(false); BuildMI(MBB, MI, DL, get(NewOpcd), HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) - .addOperand(MI->getOperand(1)) + .addOperand(MI.getOperand(1)) // The Vectors are indexed in multiples of vector size. - .addImm(MI->getOperand(2).getImm() + Offset) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + .addImm(MI.getOperand(2).getImm() + Offset) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MBB.erase(MI); return true; } case Hexagon::LDriv_pseudo_V6_128B: Is128B = true; case Hexagon::LDriv_pseudo_V6: { - unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B : Hexagon::V6_vL32b_ai; - int32_t Off = MI->getOperand(2).getImm(); - int32_t Idx = Off; + int32_t Off = MI.getOperand(2).getImm(); BuildMI(MBB, MI, DL, get(NewOpc), DstReg) - .addOperand(MI->getOperand(1)) - .addImm(Idx) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + .addOperand(MI.getOperand(1)) + .addImm(Off) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MBB.erase(MI); return true; } @@ -877,18 +1029,17 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) case Hexagon::STriv_pseudo_V6: { unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B : Hexagon::V6_vS32b_ai; - int32_t Off = MI->getOperand(1).getImm(); - int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6); + int32_t Off = MI.getOperand(1).getImm(); BuildMI(MBB, MI, DL, get(NewOpc)) - .addOperand(MI->getOperand(0)) - .addImm(Idx) - .addOperand(MI->getOperand(2)) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + .addOperand(MI.getOperand(0)) + .addImm(Off) + .addOperand(MI.getOperand(2)) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MBB.erase(MI); return true; } case Hexagon::TFR_PdTrue: { - unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -896,7 +1047,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) return true; } case Hexagon::TFR_PdFalse: { - unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -905,18 +1056,20 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) } case Hexagon::VMULW: { // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned Src1Reg = MI->getOperand(1).getReg(); - unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Src1Reg = MI.getOperand(1).getReg(); + unsigned Src2Reg = MI.getOperand(2).getReg(); unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); - BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), - HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + .addReg(Src1SubHi) .addReg(Src2SubHi); - BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), - HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + .addReg(Src1SubLo) .addReg(Src2SubLo); MBB.erase(MI); MRI.clearKillFlags(Src1SubHi); @@ -927,22 +1080,26 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) } case Hexagon::VMULW_ACC: { // Expand 64-bit vector multiply with addition into 2 scalar multiplies. - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned Src1Reg = MI->getOperand(1).getReg(); - unsigned Src2Reg = MI->getOperand(2).getReg(); - unsigned Src3Reg = MI->getOperand(3).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Src1Reg = MI.getOperand(1).getReg(); + unsigned Src2Reg = MI.getOperand(2).getReg(); + unsigned Src3Reg = MI.getOperand(3).getReg(); unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); - BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), - HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) - .addReg(Src2SubHi).addReg(Src3SubHi); - BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), - HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) - .addReg(Src2SubLo).addReg(Src3SubLo); + BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + .addReg(Src1SubHi) + .addReg(Src2SubHi) + .addReg(Src3SubHi); + BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + .addReg(Src1SubLo) + .addReg(Src2SubLo) + .addReg(Src3SubLo); MBB.erase(MI); MRI.clearKillFlags(Src1SubHi); MRI.clearKillFlags(Src1SubLo); @@ -952,16 +1109,58 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) MRI.clearKillFlags(Src3SubLo); return true; } + case Hexagon::Insert4: { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Src1Reg = MI.getOperand(1).getReg(); + unsigned Src2Reg = MI.getOperand(2).getReg(); + unsigned Src3Reg = MI.getOperand(3).getReg(); + unsigned Src4Reg = MI.getOperand(4).getReg(); + unsigned Src1RegIsKill = getKillRegState(MI.getOperand(1).isKill()); + unsigned Src2RegIsKill = getKillRegState(MI.getOperand(2).isKill()); + unsigned Src3RegIsKill = getKillRegState(MI.getOperand(3).isKill()); + unsigned Src4RegIsKill = getKillRegState(MI.getOperand(4).isKill()); + unsigned DstSubHi = HRI.getSubReg(DstReg, Hexagon::subreg_hireg); + unsigned DstSubLo = HRI.getSubReg(DstReg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + .addReg(DstSubLo) + .addReg(Src1Reg, Src1RegIsKill) + .addImm(16) + .addImm(0); + BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + .addReg(DstSubLo) + .addReg(Src2Reg, Src2RegIsKill) + .addImm(16) + .addImm(16); + BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + .addReg(DstSubHi) + .addReg(Src3Reg, Src3RegIsKill) + .addImm(16) + .addImm(0); + BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + .addReg(DstSubHi) + .addReg(Src4Reg, Src4RegIsKill) + .addImm(16) + .addImm(16); + MBB.erase(MI); + MRI.clearKillFlags(DstReg); + MRI.clearKillFlags(DstSubHi); + MRI.clearKillFlags(DstSubLo); + return true; + } case Hexagon::MUX64_rr: { - const MachineOperand &Op0 = MI->getOperand(0); - const MachineOperand &Op1 = MI->getOperand(1); - const MachineOperand &Op2 = MI->getOperand(2); - const MachineOperand &Op3 = MI->getOperand(3); + const MachineOperand &Op0 = MI.getOperand(0); + const MachineOperand &Op1 = MI.getOperand(1); + const MachineOperand &Op2 = MI.getOperand(2); + const MachineOperand &Op3 = MI.getOperand(3); unsigned Rd = Op0.getReg(); unsigned Pu = Op1.getReg(); unsigned Rs = Op2.getReg(); unsigned Rt = Op3.getReg(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); unsigned K1 = getKillRegState(Op1.isKill()); unsigned K2 = getKillRegState(Op2.isKill()); unsigned K3 = getKillRegState(Op3.isKill()); @@ -976,24 +1175,62 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) MBB.erase(MI); return true; } + case Hexagon::VSelectPseudo_V6: { + const MachineOperand &Op0 = MI.getOperand(0); + const MachineOperand &Op1 = MI.getOperand(1); + const MachineOperand &Op2 = MI.getOperand(2); + const MachineOperand &Op3 = MI.getOperand(3); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov)) + .addOperand(Op0) + .addOperand(Op1) + .addOperand(Op2); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov)) + .addOperand(Op0) + .addOperand(Op1) + .addOperand(Op3); + MBB.erase(MI); + return true; + } + case Hexagon::VSelectDblPseudo_V6: { + MachineOperand &Op0 = MI.getOperand(0); + MachineOperand &Op1 = MI.getOperand(1); + MachineOperand &Op2 = MI.getOperand(2); + MachineOperand &Op3 = MI.getOperand(3); + unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::subreg_loreg); + unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::subreg_hireg); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine)) + .addOperand(Op0) + .addOperand(Op1) + .addReg(SrcHi) + .addReg(SrcLo); + SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::subreg_loreg); + SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::subreg_hireg); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine)) + .addOperand(Op0) + .addOperand(Op1) + .addReg(SrcHi) + .addReg(SrcLo); + MBB.erase(MI); + return true; + } case Hexagon::TCRETURNi: - MI->setDesc(get(Hexagon::J2_jump)); + MI.setDesc(get(Hexagon::J2_jump)); return true; case Hexagon::TCRETURNr: - MI->setDesc(get(Hexagon::J2_jumpr)); + MI.setDesc(get(Hexagon::J2_jumpr)); return true; case Hexagon::TFRI_f: case Hexagon::TFRI_cPt_f: case Hexagon::TFRI_cNotPt_f: { unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2; - APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF(); + APFloat FVal = MI.getOperand(Opx).getFPImm()->getValueAPF(); APInt IVal = FVal.bitcastToAPInt(); - MI->RemoveOperand(Opx); + MI.RemoveOperand(Opx); unsigned NewOpc = (Opc == Hexagon::TFRI_f) ? Hexagon::A2_tfrsi : (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit : Hexagon::C2_cmoveif; - MI->setDesc(get(NewOpc)); - MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue())); + MI.setDesc(get(NewOpc)); + MI.addOperand(MachineOperand::CreateImm(IVal.getZExtValue())); return true; } } @@ -1035,20 +1272,20 @@ void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB, // if (!p0.new) R1 = add(R2, R3) // Note: New-value stores are not included here as in the current // implementation, we don't need to check their predicate sense. -bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::isPredicated(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } -bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI, - ArrayRef Cond) const { +bool HexagonInstrInfo::PredicateInstruction( + MachineInstr &MI, ArrayRef Cond) const { if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || isEndLoopN(Cond[0].getImm())) { - DEBUG(dbgs() << "\nCannot predicate:"; MI->dump();); + DEBUG(dbgs() << "\nCannot predicate:"; MI.dump();); return false; } - int Opc = MI->getOpcode(); + int Opc = MI.getOpcode(); assert (isPredicable(MI) && "Expected predicable instruction"); bool invertJump = predOpcodeHasNot(Cond); @@ -1057,13 +1294,13 @@ bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI, // plicated manipulations with the operands (handling tied operands, // etc.), build a new temporary instruction, then overwrite MI with it. - MachineBasicBlock &B = *MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &B = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); unsigned PredOpc = getCondOpcode(Opc, invertJump); MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc)); - unsigned NOp = 0, NumOps = MI->getNumOperands(); + unsigned NOp = 0, NumOps = MI.getNumOperands(); while (NOp < NumOps) { - MachineOperand &Op = MI->getOperand(NOp); + MachineOperand &Op = MI.getOperand(NOp); if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) break; T.addOperand(Op); @@ -1076,13 +1313,13 @@ bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI, assert(GotPredReg); T.addReg(PredReg, PredRegFlags); while (NOp < NumOps) - T.addOperand(MI->getOperand(NOp++)); + T.addOperand(MI.getOperand(NOp++)); - MI->setDesc(get(PredOpc)); - while (unsigned n = MI->getNumOperands()) - MI->RemoveOperand(n-1); + MI.setDesc(get(PredOpc)); + while (unsigned n = MI.getNumOperands()) + MI.RemoveOperand(n-1); for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) - MI->addOperand(T->getOperand(i)); + MI.addOperand(T->getOperand(i)); MachineBasicBlock::instr_iterator TI = T->getIterator(); B.erase(TI); @@ -1100,11 +1337,11 @@ bool HexagonInstrInfo::SubsumesPredicate(ArrayRef Pred1, } -bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const { +bool HexagonInstrInfo::DefinesPredicate( + MachineInstr &MI, std::vector &Pred) const { auto &HRI = getRegisterInfo(); - for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { - MachineOperand MO = MI->getOperand(oper); + for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) { + MachineOperand MO = MI.getOperand(oper); if (MO.isReg() && MO.isDef()) { const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg()); if (RC == &Hexagon::PredRegsRegClass) { @@ -1116,107 +1353,25 @@ bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, return false; } -bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { - bool isPred = MI->getDesc().isPredicable(); - - if (!isPred) - return false; - - const int Opc = MI->getOpcode(); - int NumOperands = MI->getNumOperands(); - - // Keep a flag for upto 4 operands in the instructions, to indicate if - // that operand has been constant extended. - bool OpCExtended[4]; - if (NumOperands > 4) - NumOperands = 4; - - for (int i = 0; i < NumOperands; i++) - OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI)); - - switch(Opc) { - case Hexagon::A2_tfrsi: - return (isOperandExtended(MI, 1) && isConstExtended(MI)) || - isInt<12>(MI->getOperand(1).getImm()); - - case Hexagon::S2_storerd_io: - return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); - - case Hexagon::S2_storeri_io: - case Hexagon::S2_storerinew_io: - return isShiftedUInt<6,2>(MI->getOperand(1).getImm()); - - case Hexagon::S2_storerh_io: - case Hexagon::S2_storerhnew_io: - return isShiftedUInt<6,1>(MI->getOperand(1).getImm()); - - case Hexagon::S2_storerb_io: - case Hexagon::S2_storerbnew_io: - return isUInt<6>(MI->getOperand(1).getImm()); - - case Hexagon::L2_loadrd_io: - return isShiftedUInt<6,3>(MI->getOperand(2).getImm()); - - case Hexagon::L2_loadri_io: - return isShiftedUInt<6,2>(MI->getOperand(2).getImm()); - - case Hexagon::L2_loadrh_io: - case Hexagon::L2_loadruh_io: - return isShiftedUInt<6,1>(MI->getOperand(2).getImm()); - - case Hexagon::L2_loadrb_io: - case Hexagon::L2_loadrub_io: - return isUInt<6>(MI->getOperand(2).getImm()); - - case Hexagon::L2_loadrd_pi: - return isShiftedInt<4,3>(MI->getOperand(3).getImm()); - - case Hexagon::L2_loadri_pi: - return isShiftedInt<4,2>(MI->getOperand(3).getImm()); - - case Hexagon::L2_loadrh_pi: - case Hexagon::L2_loadruh_pi: - return isShiftedInt<4,1>(MI->getOperand(3).getImm()); - - case Hexagon::L2_loadrb_pi: - case Hexagon::L2_loadrub_pi: - return isInt<4>(MI->getOperand(3).getImm()); - - case Hexagon::S4_storeirb_io: - case Hexagon::S4_storeirh_io: - case Hexagon::S4_storeiri_io: - return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) && - (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm())); - - case Hexagon::A2_addi: - return isInt<8>(MI->getOperand(2).getImm()); - case Hexagon::A2_aslh: - case Hexagon::A2_asrh: - case Hexagon::A2_sxtb: - case Hexagon::A2_sxth: - case Hexagon::A2_zxtb: - case Hexagon::A2_zxth: - return true; - } - - return true; +bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const { + return MI.getDesc().isPredicable(); } - -bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, - const MachineBasicBlock *MBB, const MachineFunction &MF) const { +bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { // Debug info is never a scheduling boundary. It's necessary to be explicit // due to the special treatment of IT instructions below, otherwise a // dbg_value followed by an IT will result in the IT instruction being // considered a scheduling hazard, which is wrong. It should be the actual // instruction preceding the dbg_value instruction(s), just like it is // when debug info is not present. - if (MI->isDebugValue()) + if (MI.isDebugValue()) return false; // Throwing call is a boundary. - if (MI->isCall()) { + if (MI.isCall()) { // If any of the block's successors is a landing pad, this could be a // throwing call. for (auto I : MBB->successors()) @@ -1225,15 +1380,15 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, } // Don't mess around with no return calls. - if (MI->getOpcode() == Hexagon::CALLv3nr) + if (MI.getOpcode() == Hexagon::CALLv3nr) return true; // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isPosition()) + if (MI.getDesc().isTerminator() || MI.isPosition()) return true; - if (MI->isInlineAsm() && !ScheduleInlineAsm) - return true; + if (MI.isInlineAsm() && !ScheduleInlineAsm) + return true; return false; } @@ -1286,9 +1441,10 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. -bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const { - unsigned Opc = MI->getOpcode(); +bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, + int &Value) const { + unsigned Opc = MI.getOpcode(); // Set mask and the first source register. switch (Opc) { @@ -1307,7 +1463,7 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, case Hexagon::C4_cmpneqi: case Hexagon::C4_cmplteui: case Hexagon::C4_cmpltei: - SrcReg = MI->getOperand(1).getReg(); + SrcReg = MI.getOperand(1).getReg(); Mask = ~0; break; case Hexagon::A4_cmpbeq: @@ -1316,7 +1472,7 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, case Hexagon::A4_cmpbeqi: case Hexagon::A4_cmpbgti: case Hexagon::A4_cmpbgtui: - SrcReg = MI->getOperand(1).getReg(); + SrcReg = MI.getOperand(1).getReg(); Mask = 0xFF; break; case Hexagon::A4_cmpheq: @@ -1325,7 +1481,7 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, case Hexagon::A4_cmpheqi: case Hexagon::A4_cmphgti: case Hexagon::A4_cmphgtui: - SrcReg = MI->getOperand(1).getReg(); + SrcReg = MI.getOperand(1).getReg(); Mask = 0xFFFF; break; } @@ -1347,7 +1503,7 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, case Hexagon::C4_cmpneq: case Hexagon::C4_cmplte: case Hexagon::C4_cmplteu: - SrcReg2 = MI->getOperand(2).getReg(); + SrcReg2 = MI.getOperand(2).getReg(); return true; case Hexagon::C2_cmpeqi: @@ -1363,17 +1519,17 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, case Hexagon::A4_cmphgti: case Hexagon::A4_cmphgtui: SrcReg2 = 0; - Value = MI->getOperand(2).getImm(); + Value = MI.getOperand(2).getImm(); return true; } return false; } - unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, unsigned *PredCost) const { - return getInstrTimingClassLatency(ItinData, MI); + const MachineInstr &MI, + unsigned *PredCost) const { + return getInstrTimingClassLatency(ItinData, &MI); } @@ -1388,27 +1544,27 @@ DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( // %R13 = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] // S2_storeri_io %R29, 132, %R1; flags: mem:ST4[FixedStack1] // Currently AA considers the addresses in these instructions to be aliasing. -bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, - MachineInstr *MIb, AliasAnalysis *AA) const { +bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( + MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const { int OffsetA = 0, OffsetB = 0; unsigned SizeA = 0, SizeB = 0; - if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || - MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef()) + if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || + MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; // Instructions that are pure loads, not loads and stores like memops are not // dependent. - if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb)) + if (MIa.mayLoad() && !isMemOp(&MIa) && MIb.mayLoad() && !isMemOp(&MIb)) return true; // Get base, offset, and access size in MIa. - unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA); + unsigned BaseRegA = getBaseAndOffset(&MIa, OffsetA, SizeA); if (!BaseRegA || !SizeA) return false; // Get base, offset, and access size in MIb. - unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB); + unsigned BaseRegB = getBaseAndOffset(&MIb, OffsetB, SizeB); if (!BaseRegB || !SizeB) return false; @@ -1486,13 +1642,13 @@ bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const { bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const { - return (MI->isBranch() && isPredicated(MI)) || + return (MI->isBranch() && isPredicated(*MI)) || isConditionalTransfer(MI) || isConditionalALU32(MI) || isConditionalLoad(MI) || // Predicated stores which don't have a .new on any operands. - (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) && - !isPredicatedNew(MI)); + (MI->mayStore() && isPredicated(*MI) && !isNewValueStore(MI) && + !isPredicatedNew(*MI)); } @@ -1557,7 +1713,7 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const { // FIXME - Function name and it's functionality don't match. // It should be renamed to hasPredNewOpcode() bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const { - if (!MI->getDesc().mayLoad() || !isPredicated(MI)) + if (!MI->getDesc().mayLoad() || !isPredicated(*MI)) return false; int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); @@ -1763,8 +1919,7 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const { // Returns true, if any one of the operands is a dot new // insn, whether it is predicated dot new or register dot new. bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const { - if (isNewValueInst(MI) || - (isPredicated(MI) && isPredicatedNew(MI))) + if (isNewValueInst(MI) || (isPredicated(*MI) && isPredicatedNew(*MI))) return true; return false; @@ -2129,8 +2284,8 @@ bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const { } -bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; assert(isPredicated(MI)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } @@ -2143,8 +2298,8 @@ bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { } -bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; +bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr &MI) const { + const uint64_t F = MI.getDesc().TSFlags; return !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask); } @@ -2181,7 +2336,87 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || - MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT; + MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT || + MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_PIC || + MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC; +} + +bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + // Byte + case Hexagon::L2_loadrb_io: + case Hexagon::L4_loadrb_ur: + case Hexagon::L4_loadrb_ap: + case Hexagon::L2_loadrb_pr: + case Hexagon::L2_loadrb_pbr: + case Hexagon::L2_loadrb_pi: + case Hexagon::L2_loadrb_pci: + case Hexagon::L2_loadrb_pcr: + case Hexagon::L2_loadbsw2_io: + case Hexagon::L4_loadbsw2_ur: + case Hexagon::L4_loadbsw2_ap: + case Hexagon::L2_loadbsw2_pr: + case Hexagon::L2_loadbsw2_pbr: + case Hexagon::L2_loadbsw2_pi: + case Hexagon::L2_loadbsw2_pci: + case Hexagon::L2_loadbsw2_pcr: + case Hexagon::L2_loadbsw4_io: + case Hexagon::L4_loadbsw4_ur: + case Hexagon::L4_loadbsw4_ap: + case Hexagon::L2_loadbsw4_pr: + case Hexagon::L2_loadbsw4_pbr: + case Hexagon::L2_loadbsw4_pi: + case Hexagon::L2_loadbsw4_pci: + case Hexagon::L2_loadbsw4_pcr: + case Hexagon::L4_loadrb_rr: + case Hexagon::L2_ploadrbt_io: + case Hexagon::L2_ploadrbt_pi: + case Hexagon::L2_ploadrbf_io: + case Hexagon::L2_ploadrbf_pi: + case Hexagon::L2_ploadrbtnew_io: + case Hexagon::L2_ploadrbfnew_io: + case Hexagon::L4_ploadrbt_rr: + case Hexagon::L4_ploadrbf_rr: + case Hexagon::L4_ploadrbtnew_rr: + case Hexagon::L4_ploadrbfnew_rr: + case Hexagon::L2_ploadrbtnew_pi: + case Hexagon::L2_ploadrbfnew_pi: + case Hexagon::L4_ploadrbt_abs: + case Hexagon::L4_ploadrbf_abs: + case Hexagon::L4_ploadrbtnew_abs: + case Hexagon::L4_ploadrbfnew_abs: + case Hexagon::L2_loadrbgp: + // Half + case Hexagon::L2_loadrh_io: + case Hexagon::L4_loadrh_ur: + case Hexagon::L4_loadrh_ap: + case Hexagon::L2_loadrh_pr: + case Hexagon::L2_loadrh_pbr: + case Hexagon::L2_loadrh_pi: + case Hexagon::L2_loadrh_pci: + case Hexagon::L2_loadrh_pcr: + case Hexagon::L4_loadrh_rr: + case Hexagon::L2_ploadrht_io: + case Hexagon::L2_ploadrht_pi: + case Hexagon::L2_ploadrhf_io: + case Hexagon::L2_ploadrhf_pi: + case Hexagon::L2_ploadrhtnew_io: + case Hexagon::L2_ploadrhfnew_io: + case Hexagon::L4_ploadrht_rr: + case Hexagon::L4_ploadrhf_rr: + case Hexagon::L4_ploadrhtnew_rr: + case Hexagon::L4_ploadrhfnew_rr: + case Hexagon::L2_ploadrhtnew_pi: + case Hexagon::L2_ploadrhfnew_pi: + case Hexagon::L4_ploadrht_abs: + case Hexagon::L4_ploadrhf_abs: + case Hexagon::L4_ploadrhtnew_abs: + case Hexagon::L4_ploadrhfnew_abs: + case Hexagon::L2_loadrhgp: + return true; + default: + return false; + } } @@ -2202,6 +2437,17 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const { } +bool HexagonInstrInfo::isTailCall(const MachineInstr *MI) const { + if (!MI->isBranch()) + return false; + + for (auto &Op : MI->operands()) + if (Op.isGlobal() || Op.isSymbol()) + return true; + return false; +} + + // Returns true when SU has a timing class TC1. bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const { unsigned SchedClass = MI->getDesc().getSchedClass(); @@ -2269,6 +2515,28 @@ bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const { } +// Schedule this ASAP. +bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr *MI1, + const MachineInstr *MI2) const { + if (!MI1 || !MI2) + return false; + if (mayBeCurLoad(MI1)) { + // if (result of SU is used in Next) return true; + unsigned DstReg = MI1->getOperand(0).getReg(); + int N = MI2->getNumOperands(); + for (int I = 0; I < N; I++) + if (MI2->getOperand(I).isReg() && DstReg == MI2->getOperand(I).getReg()) + return true; + } + if (mayBeNewStore(MI2)) + if (MI2->getOpcode() == Hexagon::V6_vS32b_pi) + if (MI1->getOperand(0).isReg() && MI2->getOperand(3).isReg() && + MI1->getOperand(0).getReg() == MI2->getOperand(3).getReg()) + return true; + return false; +} + + bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const { if (!MI) return false; @@ -2366,6 +2634,21 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::J2_loop0i: case Hexagon::J2_loop1i: return isUInt<10>(Offset); + + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + return isUInt<6>(Offset); + + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + return isShiftedUInt<6,1>(Offset); + + case Hexagon::S4_storeiri_io: + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + return isShiftedUInt<6,2>(Offset); } if (Extend) @@ -2422,10 +2705,12 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L4_or_memopb_io : return (0 <= Offset && Offset <= 63); - // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of + // LDriw_xxx and STriw_xxx are pseudo operations, so it has to take offset of // any size. Later pass knows how to handle it. case Hexagon::STriw_pred: case Hexagon::LDriw_pred: + case Hexagon::STriw_mod: + case Hexagon::LDriw_mod: return true; case Hexagon::TFR_FI: @@ -2439,9 +2724,6 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L2_ploadrubf_io: case Hexagon::S2_pstorerbt_io: case Hexagon::S2_pstorerbf_io: - case Hexagon::S4_storeirb_io: - case Hexagon::S4_storeirbt_io: - case Hexagon::S4_storeirbf_io: return isUInt<6>(Offset); case Hexagon::L2_ploadrht_io: @@ -2450,18 +2732,12 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L2_ploadruhf_io: case Hexagon::S2_pstorerht_io: case Hexagon::S2_pstorerhf_io: - case Hexagon::S4_storeirh_io: - case Hexagon::S4_storeirht_io: - case Hexagon::S4_storeirhf_io: return isShiftedUInt<6,1>(Offset); case Hexagon::L2_ploadrit_io: case Hexagon::L2_ploadrif_io: case Hexagon::S2_pstorerit_io: case Hexagon::S2_pstorerif_io: - case Hexagon::S4_storeiri_io: - case Hexagon::S4_storeirit_io: - case Hexagon::S4_storeirif_io: return isShiftedUInt<6,2>(Offset); case Hexagon::L2_ploadrdt_io: @@ -2506,6 +2782,94 @@ bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI, return false; } +bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + // Byte + case Hexagon::L2_loadrub_io: + case Hexagon::L4_loadrub_ur: + case Hexagon::L4_loadrub_ap: + case Hexagon::L2_loadrub_pr: + case Hexagon::L2_loadrub_pbr: + case Hexagon::L2_loadrub_pi: + case Hexagon::L2_loadrub_pci: + case Hexagon::L2_loadrub_pcr: + case Hexagon::L2_loadbzw2_io: + case Hexagon::L4_loadbzw2_ur: + case Hexagon::L4_loadbzw2_ap: + case Hexagon::L2_loadbzw2_pr: + case Hexagon::L2_loadbzw2_pbr: + case Hexagon::L2_loadbzw2_pi: + case Hexagon::L2_loadbzw2_pci: + case Hexagon::L2_loadbzw2_pcr: + case Hexagon::L2_loadbzw4_io: + case Hexagon::L4_loadbzw4_ur: + case Hexagon::L4_loadbzw4_ap: + case Hexagon::L2_loadbzw4_pr: + case Hexagon::L2_loadbzw4_pbr: + case Hexagon::L2_loadbzw4_pi: + case Hexagon::L2_loadbzw4_pci: + case Hexagon::L2_loadbzw4_pcr: + case Hexagon::L4_loadrub_rr: + case Hexagon::L2_ploadrubt_io: + case Hexagon::L2_ploadrubt_pi: + case Hexagon::L2_ploadrubf_io: + case Hexagon::L2_ploadrubf_pi: + case Hexagon::L2_ploadrubtnew_io: + case Hexagon::L2_ploadrubfnew_io: + case Hexagon::L4_ploadrubt_rr: + case Hexagon::L4_ploadrubf_rr: + case Hexagon::L4_ploadrubtnew_rr: + case Hexagon::L4_ploadrubfnew_rr: + case Hexagon::L2_ploadrubtnew_pi: + case Hexagon::L2_ploadrubfnew_pi: + case Hexagon::L4_ploadrubt_abs: + case Hexagon::L4_ploadrubf_abs: + case Hexagon::L4_ploadrubtnew_abs: + case Hexagon::L4_ploadrubfnew_abs: + case Hexagon::L2_loadrubgp: + // Half + case Hexagon::L2_loadruh_io: + case Hexagon::L4_loadruh_ur: + case Hexagon::L4_loadruh_ap: + case Hexagon::L2_loadruh_pr: + case Hexagon::L2_loadruh_pbr: + case Hexagon::L2_loadruh_pi: + case Hexagon::L2_loadruh_pci: + case Hexagon::L2_loadruh_pcr: + case Hexagon::L4_loadruh_rr: + case Hexagon::L2_ploadruht_io: + case Hexagon::L2_ploadruht_pi: + case Hexagon::L2_ploadruhf_io: + case Hexagon::L2_ploadruhf_pi: + case Hexagon::L2_ploadruhtnew_io: + case Hexagon::L2_ploadruhfnew_io: + case Hexagon::L4_ploadruht_rr: + case Hexagon::L4_ploadruhf_rr: + case Hexagon::L4_ploadruhtnew_rr: + case Hexagon::L4_ploadruhfnew_rr: + case Hexagon::L2_ploadruhtnew_pi: + case Hexagon::L2_ploadruhfnew_pi: + case Hexagon::L4_ploadruht_abs: + case Hexagon::L4_ploadruhf_abs: + case Hexagon::L4_ploadruhtnew_abs: + case Hexagon::L4_ploadruhfnew_abs: + case Hexagon::L2_loadruhgp: + return true; + default: + return false; + } +} + + +// Add latency to instruction. +bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr *MI1, + const MachineInstr *MI2) const { + if (isV60VectorInstruction(MI1) && isV60VectorInstruction(MI2)) + if (!isVecUsableNextPacket(MI1, MI2)) + return true; + return false; +} + /// \brief Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First, @@ -2687,6 +3051,11 @@ bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef Cond) const { } +short HexagonInstrInfo::getAbsoluteForm(const MachineInstr *MI) const { + return Hexagon::getAbsoluteForm(MI->getOpcode()); +} + + unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { const uint64_t F = MI->getDesc().TSFlags; return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; @@ -2735,8 +3104,6 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos, unsigned &OffsetPos) const { // Deal with memops first. if (isMemOp(MI)) { - assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && - "Bad Memop."); BasePos = 0; OffsetPos = 1; } else if (MI->mayStore()) { @@ -2748,7 +3115,7 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI, } else return false; - if (isPredicated(MI)) { + if (isPredicated(*MI)) { BasePos++; OffsetPos++; } @@ -2802,7 +3169,7 @@ SmallVector HexagonInstrInfo::getBranchingInstrs( return Jumpers; --I; } - if (!isUnpredicatedTerminator(&*I)) + if (!isUnpredicatedTerminator(*I)) return Jumpers; // Get the last instruction in the block. @@ -2811,7 +3178,7 @@ SmallVector HexagonInstrInfo::getBranchingInstrs( MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. do { - if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) { if (!SecondLastInst) { SecondLastInst = &*I; Jumpers.push_back(SecondLastInst); @@ -2826,6 +3193,23 @@ SmallVector HexagonInstrInfo::getBranchingInstrs( } +short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const { + if (Opcode < 0) + return -1; + return Hexagon::getBaseWithLongOffset(Opcode); +} + + +short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr *MI) const { + return Hexagon::getBaseWithLongOffset(MI->getOpcode()); +} + + +short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr *MI) const { + return Hexagon::getBaseWithRegOffset(MI->getOpcode()); +} + + // Returns Operand Index for the constant extended instruction. unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; @@ -3102,6 +3486,7 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const { return 0; } + // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. @@ -3353,8 +3738,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::S4_storeirb_io: // memb(Rs+#u4) = #U1 Src1Reg = MI->getOperand(0).getReg(); - if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() && - isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() && + if (isIntRegForSubInst(Src1Reg) && + MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) return HexagonII::HSIG_S2; break; @@ -3532,7 +3917,7 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency( // Default to one cycle for no itinerary. However, an "empty" itinerary may // still have a MinLatency property, which getStageLatency checks. if (!ItinData) - return getInstrLatency(ItinData, MI); + return getInstrLatency(ItinData, *MI); // Get the latency embedded in the itinerary. If we're not using timing class // latencies or if we using BSB scheduling, then restrict the maximum latency @@ -3737,7 +4122,7 @@ unsigned HexagonInstrInfo::nonDbgBundleSize( assert(BundleHead->isBundle() && "Not a bundle header"); auto MII = BundleHead.getInstrIterator(); // Skip the bundle header. - return nonDbgMICount(++MII, getBundleEnd(BundleHead)); + return nonDbgMICount(++MII, getBundleEnd(*BundleHead)); } @@ -3770,7 +4155,7 @@ bool HexagonInstrInfo::invertAndChangeJumpTarget( --TargetPos; assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB()); MI->getOperand(TargetPos).setMBB(NewTarget); - if (EnableBranchPrediction && isPredicatedNew(MI)) { + if (EnableBranchPrediction && isPredicatedNew(*MI)) { NewOpcode = reversePrediction(NewOpcode); } MI->setDesc(get(NewOpcode)); @@ -3826,3 +4211,7 @@ bool HexagonInstrInfo::validateBranchCond(const ArrayRef &Cond) return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); } + +short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr *MI) const { + return Hexagon::xformRegToImmOffset(MI->getOpcode()); +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 9530d9f2aa0d..66b6883c955b 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -43,7 +43,7 @@ public: /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. - unsigned isLoadFromStackSlot(const MachineInstr *MI, + unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; /// If the specified machine instruction is a direct @@ -51,7 +51,7 @@ public: /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. - unsigned isStoreToStackSlot(const MachineInstr *MI, + unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; /// Analyze the branching code at the end of MBB, returning @@ -79,10 +79,10 @@ public: /// If AllowModify is true, then this routine is allowed to modify the basic /// block (e.g. delete instructions after the unconditional branch). /// - bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; /// Remove the branching code at the end of the specific MBB. /// This is only invoked in cases where AnalyzeBranch returns success. It @@ -101,7 +101,7 @@ public: /// merging needs to be disabled. unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, - DebugLoc DL) const override; + const DebugLoc &DL) const override; /// Return true if it's profitable to predicate /// instructions with accumulated instruction latency of "NumCycles" @@ -141,9 +141,8 @@ public: /// The source and destination registers may overlap, which may require a /// careful implementation when multiple copy instructions are required for /// large registers. See for example the ARM target. - void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; /// Store the specified register of the given register class to the specified @@ -171,7 +170,7 @@ public: /// into real instructions. The target can edit MI in place, or it can insert /// new instructions and erase MI. The function should return true if /// anything was changed. - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + bool expandPostRAPseudo(MachineInstr &MI) const override; /// Reverses the branch condition of the specified condition list, /// returning false on success and true if it cannot be reversed. @@ -183,11 +182,11 @@ public: MachineBasicBlock::iterator MI) const override; /// Returns true if the instruction is already predicated. - bool isPredicated(const MachineInstr *MI) const override; + bool isPredicated(const MachineInstr &MI) const override; /// Convert the instruction into a predicated instruction. /// It returns true if the operation was successful. - bool PredicateInstruction(MachineInstr *MI, + bool PredicateInstruction(MachineInstr &MI, ArrayRef Cond) const override; /// Returns true if the first specified predicate @@ -198,17 +197,17 @@ public: /// If the specified instruction defines any predicate /// or condition code register(s) used for predication, returns true as well /// as the definition predicate(s) by reference. - bool DefinesPredicate(MachineInstr *MI, + bool DefinesPredicate(MachineInstr &MI, std::vector &Pred) const override; /// Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. - bool isPredicable(MachineInstr *MI) const override; + bool isPredicable(MachineInstr &MI) const override; /// Test if the given instruction should be considered a scheduling boundary. /// This primarily includes labels and terminators. - bool isSchedulingBoundary(const MachineInstr *MI, + bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override; @@ -227,15 +226,14 @@ public: /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. - bool analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const override; + bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, int &Value) const override; /// Compute the instruction latency of a given instruction. /// If the instruction has higher cost when predicated, it's returned via /// PredCost. unsigned getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, + const MachineInstr &MI, unsigned *PredCost = 0) const override; /// Create machine specific model for scheduling. @@ -246,10 +244,9 @@ public: // to tell, even without aliasing information, that two MIs access different // memory addresses. This function returns true if two MIs access different // memory addresses and false otherwise. - bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, - AliasAnalysis *AA = nullptr) - const override; - + bool + areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, + AliasAnalysis *AA = nullptr) const override; /// HexagonInstrInfo specifics. /// @@ -301,20 +298,24 @@ public: bool isNewValueStore(unsigned Opcode) const; bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const; bool isPostIncrement(const MachineInstr* MI) const; - bool isPredicatedNew(const MachineInstr *MI) const; + bool isPredicatedNew(const MachineInstr &MI) const; bool isPredicatedNew(unsigned Opcode) const; - bool isPredicatedTrue(const MachineInstr *MI) const; + bool isPredicatedTrue(const MachineInstr &MI) const; bool isPredicatedTrue(unsigned Opcode) const; bool isPredicated(unsigned Opcode) const; bool isPredicateLate(unsigned Opcode) const; bool isPredictedTaken(unsigned Opcode) const; bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const; + bool isSignExtendingLoad(const MachineInstr &MI) const; bool isSolo(const MachineInstr* MI) const; bool isSpillPredRegOp(const MachineInstr *MI) const; + bool isTailCall(const MachineInstr *MI) const; bool isTC1(const MachineInstr *MI) const; bool isTC2(const MachineInstr *MI) const; bool isTC2Early(const MachineInstr *MI) const; bool isTC4x(const MachineInstr *MI) const; + bool isToBeScheduledASAP(const MachineInstr *MI1, + const MachineInstr *MI2) const; bool isV60VectorInstruction(const MachineInstr *MI) const; bool isValidAutoIncImm(const EVT VT, const int Offset) const; bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; @@ -322,8 +323,10 @@ public: bool isVecALU(const MachineInstr *MI) const; bool isVecUsableNextPacket(const MachineInstr *ProdMI, const MachineInstr *ConsMI) const; + bool isZeroExtendingLoad(const MachineInstr &MI) const; - + bool addLatencyToSchedule(const MachineInstr *MI1, + const MachineInstr *MI2) const; bool canExecuteInBundle(const MachineInstr *First, const MachineInstr *Second) const; bool hasEHLabel(const MachineBasicBlock *B) const; @@ -341,11 +344,15 @@ public: bool predOpcodeHasNot(ArrayRef Cond) const; + short getAbsoluteForm(const MachineInstr *MI) const; unsigned getAddrMode(const MachineInstr* MI) const; unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset, unsigned &AccessSize) const; bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos, unsigned &OffsetPos) const; + short getBaseWithLongOffset(short Opcode) const; + short getBaseWithLongOffset(const MachineInstr *MI) const; + short getBaseWithRegOffset(const MachineInstr *MI) const; SmallVector getBranchingInstrs(MachineBasicBlock& MBB) const; unsigned getCExtOpNum(const MachineInstr *MI) const; HexagonII::CompoundGroup @@ -395,6 +402,7 @@ public: bool reversePredSense(MachineInstr* MI) const; unsigned reversePrediction(unsigned Opcode) const; bool validateBranchCond(const ArrayRef &Cond) const; + short xformRegToImmOffset(const MachineInstr *MI) const; }; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 421403f49724..74dc5ac9a3ad 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -32,6 +32,9 @@ def LoReg: OutPatFrag<(ops node:$Rs), def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>; +def orisadd: PatFrag<(ops node:$Addr, node:$off), + (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>; + // SDNode for converting immediate C to C-1. def DEC_CONST_SIGNED : SDNodeXForm, ImmRegRel, PredNewRel; def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)), (i32 (A2_addi I32:$Rs, imm:$s16))>; +let hasNewValue = 1, hasSideEffects = 0, isPseudo = 1 in +def A2_iconst + : ALU32_ri <(outs IntRegs:$Rd), + (ins s23_2Imm:$s23_2), + "$Rd = iconst(#$s23_2)"> {} + //===----------------------------------------------------------------------===// // Template class used for the following ALU32 instructions. // Rd=and(Rs,#s10) @@ -1430,7 +1439,7 @@ class CondStr { string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") "; } class JumpOpcStr { - string S = Mnemonic # !if(Taken, ":t", !if(New, ":nt", "")); + string S = Mnemonic # !if(Taken, ":t", ":nt"); } let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0, @@ -1438,9 +1447,9 @@ let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0, isExtendable = 1, opExtendable = 0, isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in class T_JMP - : JInst<(outs), (ins brtarget:$dst), + : JInst_CJUMP_UCJUMP<(outs), (ins brtarget:$dst), "jump " # ExtStr # "$dst", - [], "", J_tc_2early_SLOT23> { + [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT> { bits<24> dst; let IClass = 0b0101; @@ -1453,11 +1462,11 @@ let isBranch = 1, Defs = [PC], hasSideEffects = 0, isPredicated = 1, isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2, InputType = "imm" in class T_JMP_c - : JInst<(outs), (ins PredRegs:$src, brtarget:$dst), + : JInst_CJUMP_UCJUMP<(outs), (ins PredRegs:$src, brtarget:$dst), CondStr<"$src", !if(PredNot,0,1), isPredNew>.S # JumpOpcStr<"jump", isPredNew, isTak>.S # " " # ExtStr # "$dst", - [], "", J_tc_2early_SLOT23>, ImmRegRel { + [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT>, ImmRegRel { let isTaken = isTak; let isPredicatedFalse = PredNot; let isPredicatedNew = isPredNew; @@ -1576,19 +1585,31 @@ let Defs = VolatileV3.Regs in { let isTerminator = 1, hasSideEffects = 0 in { defm J2_jump : JMP_base<"JMP", "">, PredNewRel; - // Deal with explicit assembly - // - never extened a jump #, always extend a jump ## - let isAsmParserOnly = 1 in { - defm J2_jump_ext : JMP_base<"JMP", "##">; - defm J2_jump_noext : JMP_base<"JMP", "#">; - } - defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel; let isReturn = 1, isCodeGenOnly = 1 in defm JMPret : JMPR_base<"JMPret">, PredNewRel; } +let validSubTargets = HasV60SubT in +multiclass JMPpt_base { + let BaseOpcode = BaseOp in { + def tpt : T_JMP_c <0, 0, 1, "">; // Predicate true - taken + def fpt : T_JMP_c <1, 0, 1, "">; // Predicate false - taken + } +} + +let validSubTargets = HasV60SubT in +multiclass JMPRpt_base { + let BaseOpcode = BaseOp in { + def tpt : T_JMPr_c<0, 0, 1>; // predicate true - taken + def fpt : T_JMPr_c<1, 0, 1>; // predicate false - taken + } +} + +defm J2_jumpr : JMPRpt_base<"JMPr">; +defm J2_jump : JMPpt_base<"JMP">; + def: Pat<(br bb:$dst), (J2_jump brtarget:$dst)>; def: Pat<(retflag), @@ -1769,6 +1790,8 @@ multiclass Loadx_pat; def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))), (VT (MI IntRegs:$Rs, imm:$Off))>; def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>; @@ -2010,6 +2033,12 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, def LDriw_pred : LDInst<(outs PredRegs:$dst), (ins IntRegs:$addr, s11_2Ext:$off), ".error \"should not emit\"", []>; +// Load modifier. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in +def LDriw_mod : LDInst<(outs ModRegs:$dst), + (ins IntRegs:$addr, s11_2Ext:$off), + ".error \"should not emit\"", []>; let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in def L2_deallocframe : LDInst<(outs), (ins), @@ -2023,7 +2052,7 @@ let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in } // Load / Post increment circular addressing mode. -let Uses = [CS], hasSideEffects = 0 in +let Uses = [CS], hasSideEffects = 0, addrMode = PostInc in class T_load_pcr MajOp> : LDInst <(outs RC:$dst, IntRegs:$_dst_), (ins IntRegs:$Rz, ModRegs:$Mu), @@ -2070,7 +2099,7 @@ let accessSize = DoubleWordAccess in def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>; // Load / Post increment circular addressing mode. -let Uses = [CS], hasSideEffects = 0 in +let Uses = [CS], hasSideEffects = 0, addrMode = PostInc in class T_loadalign_pcr MajOp, MemAccessSize AccessSz > : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_), (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu), @@ -2099,7 +2128,7 @@ def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>; //===----------------------------------------------------------------------===// // Circular loads with immediate offset. //===----------------------------------------------------------------------===// -let Uses = [CS], mayLoad = 1, hasSideEffects = 0 in +let Uses = [CS], mayLoad = 1, hasSideEffects = 0, addrMode = PostInc in class T_load_pci MajOp> : LDInstPI<(outs RC:$dst, IntRegs:$_dst_), @@ -2155,28 +2184,6 @@ let accessSize = WordAccess, hasNewValue = 0 in { let accessSize = DoubleWordAccess, hasNewValue = 0 in def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>; -//===----------------------------------------------------------------------===// -// Circular loads - Pseudo -// -// Please note that the input operand order in the pseudo instructions -// doesn't match with the real instructions. Pseudo instructions operand -// order should mimics the ordering in the intrinsics. Also, 'src2' doesn't -// appear in the AsmString because it's same as 'dst'. -//===----------------------------------------------------------------------===// -let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in -class T_load_pci_pseudo - : LDInstPI<(outs IntRegs:$_dst_, RC:$dst), - (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4Imm:$src4), - ".error \"$dst = "#opc#"($src1++#$src4:circ($src3))\"", - [], "$src1 = $_dst_">; - -def L2_loadrb_pci_pseudo : T_load_pci_pseudo <"memb", IntRegs>; -def L2_loadrub_pci_pseudo : T_load_pci_pseudo <"memub", IntRegs>; -def L2_loadrh_pci_pseudo : T_load_pci_pseudo <"memh", IntRegs>; -def L2_loadruh_pci_pseudo : T_load_pci_pseudo <"memuh", IntRegs>; -def L2_loadri_pci_pseudo : T_load_pci_pseudo <"memw", IntRegs>; -def L2_loadrd_pci_pseudo : T_load_pci_pseudo <"memd", DoubleRegs>; - // TODO: memb_fifo and memh_fifo must take destination register as input. // One-off circ loads - not enough in common to break into a class. @@ -2233,7 +2240,7 @@ def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>; //===----------------------------------------------------------------------===// // Bit-reversed loads with auto-increment register //===----------------------------------------------------------------------===// -let hasSideEffects = 0 in +let hasSideEffects = 0, addrMode = PostInc in class T_load_pbr majOp> : LDInst @@ -2277,26 +2284,6 @@ def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>; def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs, HalfWordAccess, 0b0010>; -//===----------------------------------------------------------------------===// -// Bit-reversed loads - Pseudo -// -// Please note that 'src2' doesn't appear in the AsmString because -// it's same as 'dst'. -//===----------------------------------------------------------------------===// -let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0, isPseudo = 1 in -class T_load_pbr_pseudo - : LDInstPI<(outs IntRegs:$_dst_, RC:$dst), - (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), - ".error \"$dst = "#opc#"($src1++$src3:brev)\"", - [], "$src1 = $_dst_">; - -def L2_loadrb_pbr_pseudo : T_load_pbr_pseudo <"memb", IntRegs>; -def L2_loadrub_pbr_pseudo : T_load_pbr_pseudo <"memub", IntRegs>; -def L2_loadrh_pbr_pseudo : T_load_pbr_pseudo <"memh", IntRegs>; -def L2_loadruh_pbr_pseudo : T_load_pbr_pseudo <"memuh", IntRegs>; -def L2_loadri_pbr_pseudo : T_load_pbr_pseudo <"memw", IntRegs>; -def L2_loadrd_pbr_pseudo : T_load_pbr_pseudo <"memd", DoubleRegs>; - //===----------------------------------------------------------------------===// // LD - //===----------------------------------------------------------------------===// @@ -3558,14 +3545,20 @@ let addrMode = BaseImmOffset, InputType = "imm" in { // AddedComplexity) to the individual patterns. class Storex_fi_pat : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; -class Storex_fi_add_pat - : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; -class Storex_add_pat - : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +multiclass Storex_fi_add_pat { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; + def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; +} +multiclass Storex_add_pat { + def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; + def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +} class Storex_simple_pat : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), (MI IntRegs:$Rs, 0, Value:$Rt)>; @@ -3577,14 +3570,20 @@ class Storexm_fi_pat : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; -class Storexm_fi_add_pat - : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; -class Storexm_add_pat - : Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; +multiclass Storexm_fi_add_pat { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; + def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; +} +multiclass Storexm_add_pat { + def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; + def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; +} class Storexm_simple_pat : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)), @@ -3592,16 +3591,16 @@ class Storexm_simple_pat { - def: Storex_fi_pat ; - def: Storex_fi_add_pat ; - def: Storex_add_pat ; + def: Storex_fi_pat ; + defm: Storex_fi_add_pat ; + defm: Storex_add_pat ; } multiclass Storexm_pat { - def: Storexm_fi_pat ; - def: Storexm_fi_add_pat ; - def: Storexm_add_pat ; + def: Storexm_fi_pat ; + defm: Storexm_fi_add_pat ; + defm: Storexm_add_pat ; } // Regular stores in the DAG have two operands: value and address. @@ -3610,7 +3609,8 @@ multiclass Storexm_pat - : PatFrag<(ops node:$val, node:$ptr), F.Fragment>; + : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, + F.OperandTransform>; let AddedComplexity = 20 in { defm: Storex_pat; @@ -3651,6 +3651,12 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, def STriw_pred : STInst<(outs), (ins IntRegs:$addr, s11_2Ext:$off, PredRegs:$src1), ".error \"should not emit\"", []>; +// Store modifier. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in +def STriw_mod : STInst<(outs), + (ins IntRegs:$addr, s11_2Ext:$off, ModRegs:$src1), + ".error \"should not emit\"", []>; // S2_allocframe: Allocate stack frame. let Defs = [R29, R30], Uses = [R29, R31, R30], @@ -3668,7 +3674,7 @@ def S2_allocframe: ST0Inst < // S2_storer[bhwdf]_pci: Store byte/half/word/double. // S2_storer[bhwdf]_pci -> S2_storerbnew_pci -let Uses = [CS] in +let Uses = [CS], addrMode = PostInc in class T_store_pci MajOp, MemAccessSize AlignSize, string RegSrc = "Rt"> @@ -3711,7 +3717,8 @@ def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100, def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110, DoubleWordAccess>; -let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in +let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4, + addrMode = PostInc in class T_storenew_pci MajOp, MemAccessSize AlignSize> : NVInst < (outs IntRegs:$_dst_), @@ -3744,30 +3751,10 @@ def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>; def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>; def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>; -//===----------------------------------------------------------------------===// -// Circular stores - Pseudo -// -// Please note that the input operand order in the pseudo instructions -// doesn't match with the real instructions. Pseudo instructions operand -// order should mimics the ordering in the intrinsics. -//===----------------------------------------------------------------------===// -let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in -class T_store_pci_pseudo - : STInstPI<(outs IntRegs:$_dst_), - (ins IntRegs:$src1, RC:$src2, IntRegs:$src3, s4Imm:$src4), - ".error \""#opc#"($src1++#$src4:circ($src3)) = $src2\"", - [], "$_dst_ = $src1">; - -def S2_storerb_pci_pseudo : T_store_pci_pseudo <"memb", IntRegs>; -def S2_storerh_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>; -def S2_storerf_pci_pseudo : T_store_pci_pseudo <"memh", IntRegs>; -def S2_storeri_pci_pseudo : T_store_pci_pseudo <"memw", IntRegs>; -def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>; - //===----------------------------------------------------------------------===// // Circular stores with auto-increment register //===----------------------------------------------------------------------===// -let Uses = [CS] in +let Uses = [CS], addrMode = PostInc in class T_store_pcr MajOp, MemAccessSize AlignSize, string RegSrc = "Rt"> : STInst <(outs IntRegs:$_dst_), @@ -3803,7 +3790,8 @@ def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011, //===----------------------------------------------------------------------===// // Circular .new stores with auto-increment register //===----------------------------------------------------------------------===// -let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in +let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3, + addrMode = PostInc in class T_storenew_pcr MajOp, MemAccessSize AlignSize> : NVInst <(outs IntRegs:$_dst_), @@ -3834,7 +3822,7 @@ def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>; //===----------------------------------------------------------------------===// // Bit-reversed stores with auto-increment register //===----------------------------------------------------------------------===// -let hasSideEffects = 0 in +let hasSideEffects = 0, addrMode = PostInc in class T_store_pbr majOp, bit isHalf = 0> @@ -3879,7 +3867,7 @@ def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>; // Bit-reversed .new stores with auto-increment register //===----------------------------------------------------------------------===// let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3, - hasSideEffects = 0 in + hasSideEffects = 0, addrMode = PostInc in class T_storenew_pbr majOp> : NVInst <(outs IntRegs:$_dst_), (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt), @@ -3909,26 +3897,6 @@ def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>; let BaseOpcode = "S2_storeri_pbr" in def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>; -//===----------------------------------------------------------------------===// -// Bit-reversed stores - Pseudo -// -// Please note that the input operand order in the pseudo instructions -// doesn't match with the real instructions. Pseudo instructions operand -// order should mimics the ordering in the intrinsics. -//===----------------------------------------------------------------------===// -let isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0, isPseudo = 1 in -class T_store_pbr_pseudo - : STInstPI<(outs IntRegs:$_dst_), - (ins IntRegs:$src1, RC:$src2, IntRegs:$src3), - ".error \""#opc#"($src1++$src3:brev) = $src2\"", - [], "$_dst_ = $src1">; - -def S2_storerb_pbr_pseudo : T_store_pbr_pseudo <"memb", IntRegs>; -def S2_storerh_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>; -def S2_storeri_pbr_pseudo : T_store_pbr_pseudo <"memw", IntRegs>; -def S2_storerf_pbr_pseudo : T_store_pbr_pseudo <"memh", IntRegs>; -def S2_storerd_pbr_pseudo : T_store_pbr_pseudo <"memd", DoubleRegs>; - //===----------------------------------------------------------------------===// // ST - //===----------------------------------------------------------------------===// @@ -4201,22 +4169,16 @@ def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>; // Count leading zeros. def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; -def: Pat<(i32 (ctlz_zero_undef I32:$Rs)), (S2_cl0 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz_zero_undef I64:$Rss))), (S2_cl0p I64:$Rss)>; // Count trailing zeros: 32-bit. def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; -def: Pat<(i32 (cttz_zero_undef I32:$Rs)), (S2_ct0 I32:$Rs)>; // Count leading ones. def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; -def: Pat<(i32 (ctlz_zero_undef (not I32:$Rs))), (S2_cl1 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz_zero_undef (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; // Count trailing ones: 32-bit. def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; -def: Pat<(i32 (cttz_zero_undef (not I32:$Rs))), (S2_ct1 I32:$Rs)>; // The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td. @@ -4561,6 +4523,9 @@ let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">; } +def: Pat<(i32 (orisadd (i32 AddrFI:$Rs), s32ImmPred:$off)), + (i32 (TFR_FI (i32 AddrFI:$Rs), s32ImmPred:$off))>; + //===----------------------------------------------------------------------===// // CRUSER - Type. //===----------------------------------------------------------------------===// @@ -4779,10 +4744,10 @@ def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; // HI/LO Instructions let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -class REG_IMMED MajOp, bit MinOp> +class REG_IMMED MajOp, bit MinOp> : ALU32_ri<(outs IntRegs:$dst), - (ins i32imm:$imm_value), - "$dst"#RegHalf#" = #"#Op#"($imm_value)", []> { + (ins u16Imm:$imm_value), + "$dst"#RegHalf#" = $imm_value", []> { bits<5> dst; bits<32> imm_value; let IClass = 0b0111; @@ -4791,15 +4756,13 @@ class REG_IMMED MajOp, bit MinOp> let Inst{26-24} = MajOp; let Inst{21} = MinOp; let Inst{20-16} = dst; - let Inst{23-22} = !if (!eq(Op, "LO"), imm_value{15-14}, imm_value{31-30}); - let Inst{13-0} = !if (!eq(Op, "LO"), imm_value{13-0}, imm_value{29-16}); + let Inst{23-22} = imm_value{15-14}; + let Inst{13-0} = imm_value{13-0}; } let isAsmParserOnly = 1 in { - def LO : REG_IMMED<".l", "LO", 0b0, 0b001, 0b1>; - def LO_H : REG_IMMED<".l", "HI", 0b0, 0b001, 0b1>; - def HI : REG_IMMED<".h", "HI", 0b0, 0b010, 0b1>; - def HI_L : REG_IMMED<".h", "LO", 0b0, 0b010, 0b1>; + def LO : REG_IMMED<".l", 0b0, 0b001, 0b1>; + def HI : REG_IMMED<".h", 0b0, 0b010, 0b1>; } let isMoveImm = 1, isCodeGenOnly = 1 in @@ -4866,7 +4829,7 @@ def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "", let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, isCodeGenOnly = 1 in -def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)", +def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "", [(set (i1 PredRegs:$dst), 0)]>; // Pseudo instructions. diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td index 84d035da451b..9024a43aa7eb 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV3.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -21,25 +21,26 @@ def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall, // J + //===----------------------------------------------------------------------===// // Call subroutine. -let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicable = 1, +let isCall = 1, hasSideEffects = 1, isPredicable = 1, isExtended = 0, isExtendable = 1, opExtendable = 0, isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in -class T_Call +class T_Call : JInst<(outs), (ins calltarget:$dst), "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> { let BaseOpcode = "call"; bits<24> dst; + let Defs = !if (CSR, VolatileV3.Regs, []); let IClass = 0b0101; let Inst{27-25} = 0b101; let Inst{24-16,13-1} = dst{23-2}; let Inst{0} = 0b0; } -let isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs, isPredicated = 1, +let isCall = 1, hasSideEffects = 1, isPredicated = 1, isExtended = 0, isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in -class T_CallPred +class T_CallPred : JInst<(outs), (ins PredRegs:$Pu, calltarget:$dst), CondStr<"$Pu", IfTrue, 0>.S # "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> { @@ -48,6 +49,7 @@ class T_CallPred bits<2> Pu; bits<17> dst; + let Defs = !if (CSR, VolatileV3.Regs, []); let IClass = 0b0101; let Inst{27-24} = 0b1101; let Inst{23-22,20-16,13,7-1} = dst{16-2}; @@ -56,16 +58,19 @@ class T_CallPred let Inst{9-8} = Pu; } -multiclass T_Calls { - def NAME : T_Call; - def t : T_CallPred<1, ExtStr>; - def f : T_CallPred<0, ExtStr>; +multiclass T_Calls { + def NAME : T_Call; + def t : T_CallPred; + def f : T_CallPred; } -defm J2_call: T_Calls<"">, PredRel; +defm J2_call: T_Calls<1, "">, PredRel; let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in -def CALLv3nr : T_Call<"">, PredRel; +def CALLv3nr : T_Call<1, "">, PredRel; + +let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [PC, R31, R6, R7, P0] in +def CALLstk : T_Call<0, "">, PredRel; //===----------------------------------------------------------------------===// // J - diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 37c2042a2ccd..398d2d3bc716 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -1047,6 +1047,18 @@ let AddedComplexity = 40 in { def: Storexs_pat; } +class Store_rr_pat + : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), + (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; + +let AddedComplexity = 20 in { + def: Store_rr_pat; + def: Store_rr_pat; + def: Store_rr_pat; + def: Store_rr_pat; +} + + // memd(Rx++#s4:3)=Rtt // memd(Rx++#s4:3:circ(Mu))=Rtt // memd(Rx++I:circ(Mu))=Rtt @@ -1188,17 +1200,52 @@ def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; +// Emit store-immediate, but only when the stored value will not be constant- +// extended. The reason for that is that there is no pass that can optimize +// constant extenders in store-immediate instructions. In some cases we can +// end up will a number of such stores, all of which store the same extended +// value (e.g. after unrolling a loop that initializes floating point array). + +// Predicates to determine if the 16-bit immediate is expressible as a sign- +// extended 8-bit immediate. Store-immediate-halfword will ignore any bits +// beyond 0..15, so we don't care what is in there. + +def i16in8ImmPred: PatLeaf<(i32 imm), [{ + int64_t v = (int16_t)N->getSExtValue(); + return v == (int64_t)(int8_t)v; +}]>; + +// Predicates to determine if the 32-bit immediate is expressible as a sign- +// extended 8-bit immediate. +def i32in8ImmPred: PatLeaf<(i32 imm), [{ + int64_t v = (int32_t)N->getSExtValue(); + return v == (int64_t)(int8_t)v; +}]>; + + let AddedComplexity = 40 in { - // Not using frameindex patterns for these stores, because the offset - // is not extendable. This could cause problems during removing the frame - // indices, since the offset with respect to R29/R30 may not fit in the - // u6 field. - def: Storexm_add_pat; - def: Storexm_add_pat; - def: Storexm_add_pat; + // Even though the offset is not extendable in the store-immediate, we + // can still generate the fi# in the base address. If the final offset + // is not valid for the instruction, we will replace it with a scratch + // register. +// def: Storexm_fi_pat ; +// def: Storexm_fi_pat ; +// def: Storexm_fi_pat ; + +// defm: Storexm_fi_add_pat ; +// defm: Storexm_fi_add_pat ; +// defm: Storexm_fi_add_pat ; + + defm: Storexm_add_pat; + defm: Storexm_add_pat; + defm: Storexm_add_pat; } def: Storexm_simple_pat; @@ -1698,7 +1745,7 @@ class NVJ_ConstImm_template majOp, string ImmVal, : NVInst_V4<(outs), (ins IntRegs:$src1, brtarget:$offset), "if ("#!if(isNegCond, "!","")#mnemonic - #"($src1.new, #"#ImmVal#")) jump:" + #"($src1.new, #" # ImmVal # ")) jump:" #!if(isTak, "t","nt")#" $offset", []> { let isTaken = isTak; @@ -2318,21 +2365,15 @@ def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>; // Count trailing zeros: 64-bit. def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; -def: Pat<(i32 (trunc (cttz_zero_undef I64:$Rss))), (S2_ct0p I64:$Rss)>; // Count trailing ones: 64-bit. def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; -def: Pat<(i32 (trunc (cttz_zero_undef (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; // Define leading/trailing patterns that require zero-extensions to 64 bits. def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; -def: Pat<(i64 (ctlz_zero_undef I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>; def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; -def: Pat<(i64 (cttz_zero_undef I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>; def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; -def: Pat<(i64 (ctlz_zero_undef (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>; def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; -def: Pat<(i64 (cttz_zero_undef (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>; let hasSideEffects = 0, hasNewValue = 1 in @@ -2789,79 +2830,75 @@ def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt), //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// MEMOP: Word, Half, Byte +// MEMOP //===----------------------------------------------------------------------===// -def MEMOPIMM : SDNodeXFormgetSExtValue(); - return XformM5ToU5Imm(imm, SDLoc(N)); +def m5Imm8Pred : PatLeaf<(i32 imm), [{ + int8_t v = (int8_t)N->getSExtValue(); + return v > -32 && v <= -1; }]>; -def MEMOPIMM_HALF : SDNodeXFormgetSExtValue(); - return XformM5ToU5Imm(imm, SDLoc(N)); +def m5Imm16Pred : PatLeaf<(i32 imm), [{ + int16_t v = (int16_t)N->getSExtValue(); + return v > -32 && v <= -1; }]>; -def MEMOPIMM_BYTE : SDNodeXFormgetSExtValue(); - return XformM5ToU5Imm(imm, SDLoc(N)); +def Clr5Imm8Pred : PatLeaf<(i32 imm), [{ + uint32_t v = (uint8_t)~N->getZExtValue(); + return ImmIsSingleBit(v); }]>; -def SETMEMIMM : SDNodeXFormgetSExtValue(); +def Clr5Imm16Pred : PatLeaf<(i32 imm), [{ + uint32_t v = (uint16_t)~N->getZExtValue(); + return ImmIsSingleBit(v); +}]>; + +def Set5Imm8 : SDNodeXFormgetZExtValue(); return XformMskToBitPosU5Imm(imm, SDLoc(N)); }]>; -def CLRMEMIMM : SDNodeXFormgetSExtValue()); +def Set5Imm16 : SDNodeXFormgetZExtValue(); return XformMskToBitPosU5Imm(imm, SDLoc(N)); }]>; -def SETMEMIMM_SHORT : SDNodeXFormgetSExtValue(); - return XformMskToBitPosU4Imm(imm, SDLoc(N)); +def Set5Imm32 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); }]>; -def CLRMEMIMM_SHORT : SDNodeXFormgetSExtValue()); - return XformMskToBitPosU4Imm(imm, SDLoc(N)); +def Clr5Imm8 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); }]>; -def SETMEMIMM_BYTE : SDNodeXFormgetSExtValue(); - return XformMskToBitPosU3Imm(imm, SDLoc(N)); +def Clr5Imm16 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); }]>; -def CLRMEMIMM_BYTE : SDNodeXFormgetSExtValue()); - return XformMskToBitPosU3Imm(imm, SDLoc(N)); +def Clr5Imm32 : SDNodeXFormgetZExtValue(); + return XformMskToBitPosU5Imm(imm, SDLoc(N)); +}]>; + +def NegImm8 : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32); +}]>; + +def NegImm16 : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32); }]>; +def NegImm32 : SDNodeXFormgetTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); +}]>; + +def IdImm : SDNodeXForm; + //===----------------------------------------------------------------------===// // Template class for MemOp instructions with the register value. //===----------------------------------------------------------------------===// @@ -2958,197 +2995,234 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in { defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>; } -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for ALU operations on the memory -// Here value used for the ALU operation is an immediate value. -// mem[bh](Rs+#0) += #U5 -// mem[bh](Rs+#u6) += #U5 -//===----------------------------------------------------------------------===// - -multiclass MemOpi_u5Pats { - let AddedComplexity = 180 in - def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), - IntRegs:$addr), - (MI IntRegs:$addr, 0, u5ImmPred:$addend)>; - - let AddedComplexity = 190 in - def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)), - u5ImmPred:$addend), - (add IntRegs:$base, ImmPred:$offset)), - (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>; -} - -multiclass MemOpi_u5ALUOp { - defm: MemOpi_u5Pats; - defm: MemOpi_u5Pats; -} -multiclass MemOpi_u5ExtType { - // Half Word - defm: MemOpi_u5ALUOp ; - // Byte - defm: MemOpi_u5ALUOp ; +multiclass Memopxr_simple_pat { + // Addr: i32 + def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs), + (MI I32:$Rs, 0, I32:$A)>; + // Addr: fi + def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs), + (MI AddrFI:$Rs, 0, I32:$A)>; +} + +multiclass Memopxr_add_pat { + // Addr: i32 + def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A), + (add I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, I32:$A)>; + def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A), + (orisadd I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, I32:$A)>; + // Addr: fi + def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A), + (add AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, I32:$A)>; + def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A), + (orisadd AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, I32:$A)>; +} + +multiclass Memopxr_pat { + defm: Memopxr_simple_pat ; + defm: Memopxr_add_pat ; +} + +let AddedComplexity = 180 in { + // add reg + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + + // sub reg + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + + // and reg + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + + // or reg + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; + defm: Memopxr_pat; +} + + +multiclass Memopxi_simple_pat { + // Addr: i32 + def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs), + (MI I32:$Rs, 0, (ArgMod Arg:$A))>; + // Addr: fi + def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs), + (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>; +} + +multiclass Memopxi_add_pat { + // Addr: i32 + def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A), + (add I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; + def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A), + (orisadd I32:$Rs, ImmPred:$Off)), + (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; + // Addr: fi + def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A), + (add AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; + def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A), + (orisadd AddrFI:$Rs, ImmPred:$Off)), + (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; +} + +multiclass Memopxi_pat { + defm: Memopxi_simple_pat ; + defm: Memopxi_add_pat ; } -let Predicates = [UseMEMOP] in { - defm: MemOpi_u5ExtType; // zero extend - defm: MemOpi_u5ExtType; // sign extend - defm: MemOpi_u5ExtType; // any extend - // Word - defm: MemOpi_u5ALUOp ; -} - -//===----------------------------------------------------------------------===// -// multiclass to define 'Def Pats' for ALU operations on the memory. -// Here value used for the ALU operation is a negative value. -// mem[bh](Rs+#0) += #m5 -// mem[bh](Rs+#u6) += #m5 -//===----------------------------------------------------------------------===// - -multiclass MemOpi_m5Pats { - let AddedComplexity = 190 in - def: Pat<(stOp (add (ldOp IntRegs:$addr), immPred:$subend), IntRegs:$addr), - (MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>; - - let AddedComplexity = 195 in - def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)), - immPred:$subend), - (add IntRegs:$base, ImmPred:$offset)), - (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>; -} - -multiclass MemOpi_m5ExtType { - // Half Word - defm: MemOpi_m5Pats ; - // Byte - defm: MemOpi_m5Pats ; -} - -let Predicates = [UseMEMOP] in { - defm: MemOpi_m5ExtType; // zero extend - defm: MemOpi_m5ExtType; // sign extend - defm: MemOpi_m5ExtType; // any extend - - // Word - defm: MemOpi_m5Pats ; -} - -//===----------------------------------------------------------------------===// -// Multiclass to define 'def Pats' for bit operations on the memory. -// mem[bhw](Rs+#0) = [clrbit|setbit](#U5) -// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5) -//===----------------------------------------------------------------------===// - -multiclass MemOpi_bitPats { - - // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5) - let AddedComplexity = 250 in - def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), - immPred:$bitend), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; - - // mem[bhw](Rs+#0) = [clrbit|setbit](#U5) - let AddedComplexity = 225 in - def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), immPred:$bitend), IntRegs:$addr), - (MI IntRegs:$addr, 0, (xformFunc immPred:$bitend))>; -} - -multiclass MemOpi_bitExtType { - // Byte - clrbit - defm: MemOpi_bitPats; - // Byte - setbit - defm: MemOpi_bitPats; - // Half Word - clrbit - defm: MemOpi_bitPats; - // Half Word - setbit - defm: MemOpi_bitPats; -} - -let Predicates = [UseMEMOP] in { - // mem[bh](Rs+#0) = [clrbit|setbit](#U5) - // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5) - defm: MemOpi_bitExtType; // zero extend - defm: MemOpi_bitExtType; // sign extend - defm: MemOpi_bitExtType; // any extend - - // memw(Rs+#0) = [clrbit|setbit](#U5) - // memw(Rs+#u6:2) = [clrbit|setbit](#U5) - defm: MemOpi_bitPats; - defm: MemOpi_bitPats; -} - -//===----------------------------------------------------------------------===// -// Multiclass to define 'def Pats' for ALU operations on the memory -// where addend is a register. -// mem[bhw](Rs+#0) [+-&|]= Rt -// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt -//===----------------------------------------------------------------------===// - -multiclass MemOpr_Pats { - let AddedComplexity = 141 in - // mem[bhw](Rs+#0) [+-&|]= Rt - def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), (i32 IntRegs:$addend)), - IntRegs:$addr), - (MI IntRegs:$addr, 0, (i32 IntRegs:$addend))>; - - // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt - let AddedComplexity = 150 in - def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), - (i32 IntRegs:$orend)), - (add IntRegs:$base, extPred:$offset)), - (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend))>; -} - -multiclass MemOPr_ALUOp { - defm: MemOpr_Pats ; - defm: MemOpr_Pats ; - defm: MemOpr_Pats ; - defm: MemOpr_Pats ; -} - -multiclass MemOPr_ExtType { - // Half Word - defm: MemOPr_ALUOp ; - // Byte - defm: MemOPr_ALUOp ; -} - -// Define 'def Pats' for MemOps with register addend. -let Predicates = [UseMEMOP] in { - // Byte, Half Word - defm: MemOPr_ExtType; // zero extend - defm: MemOPr_ExtType; // sign extend - defm: MemOPr_ExtType; // any extend - // Word - defm: MemOPr_ALUOp ; +let AddedComplexity = 200 in { + // add imm + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + + // sub imm + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + + // clrbit imm + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + + // setbit imm + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; + defm: Memopxi_pat; } //===----------------------------------------------------------------------===// @@ -3281,22 +3355,57 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel; let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">; + let isExtended = 1, opExtendable = 0 in - def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">; + def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">; + + let Defs = [R14, R15, R28, R29, R30, R31, PC] in { + def RESTORE_DEALLOC_RET_JMP_V4_PIC : T_JMP<"">; + + let isExtended = 1, opExtendable = 0 in + def RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC : T_JMP<"">; + } } // Restore registers and dealloc frame before a tail call. let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel; + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<0, "">, PredRel; + let isExtended = 1, opExtendable = 0 in - def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel; + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<0, "">, PredRel; + + let Defs = [R14, R15, R28, R29, R30, R31, PC] in { + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC : T_Call<0, "">, PredRel; + + let isExtended = 1, opExtendable = 0 in + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC : T_Call<0, "">, PredRel; + } } // Save registers function call. let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { - def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel; + def SAVE_REGISTERS_CALL_V4 : T_Call<0, "">, PredRel; + let isExtended = 1, opExtendable = 0 in - def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel; + def SAVE_REGISTERS_CALL_V4_EXT : T_Call<0, "">, PredRel; + + let Defs = [P0] in + def SAVE_REGISTERS_CALL_V4STK : T_Call<0, "">, PredRel; + + let Defs = [P0], isExtended = 1, opExtendable = 0 in + def SAVE_REGISTERS_CALL_V4STK_EXT : T_Call<0, "">, PredRel; + + let Defs = [R14, R15, R28] in + def SAVE_REGISTERS_CALL_V4_PIC : T_Call<0, "">, PredRel; + + let Defs = [R14, R15, R28], isExtended = 1, opExtendable = 0 in + def SAVE_REGISTERS_CALL_V4_EXT_PIC : T_Call<0, "">, PredRel; + + let Defs = [R14, R15, R28, P0] in + def SAVE_REGISTERS_CALL_V4STK_PIC : T_Call<0, "">, PredRel; + + let Defs = [R14, R15, R28, P0], isExtended = 1, opExtendable = 0 in + def SAVE_REGISTERS_CALL_V4STK_EXT_PIC : T_Call<0, "">, PredRel; } //===----------------------------------------------------------------------===// @@ -3413,9 +3522,9 @@ multiclass ST_AbsMajOp, bit isAbs> - : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src), - mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new", +class T_StoreAbsGP_NV MajOp> + : NVInst_V4<(outs), (ins ImmOp:$addr, IntRegs:$src), + mnemonic #"(#$addr) = $src.new", [], "", V2LDST_tc_st_SLOT0> { bits<19> addr; bits<3> src; @@ -3426,7 +3535,6 @@ class T_StoreAbsGP_NV MajOp, bit isAbs> !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2}, !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1}, /* u16_0Imm */ addr{15-0}))); - let Uses = !if (isAbs, [], [GP]); let IClass = 0b0100; let Inst{27} = 1; @@ -3446,7 +3554,7 @@ class T_StoreAbsGP_NV MajOp, bit isAbs> let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1, isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in class T_StoreAbs_NV_Pred MajOp, bit isNot, bit isNew> - : NVInst_V4<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, IntRegs:$src2), + : NVInst_V4<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, IntRegs:$src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", ") ")#mnemonic#"(#$absaddr) = $src2.new", [], "", ST_tc_st_SLOT0>, AddrModeRel { @@ -3476,7 +3584,7 @@ class T_StoreAbs_NV_Pred MajOp, bit isNot, bit isNew> // absolute addressing. //===----------------------------------------------------------------------===// class T_StoreAbs_NV MajOp> - : T_StoreAbsGP_NV , AddrModeRel { + : T_StoreAbsGP_NV , AddrModeRel { string ImmOpStr = !cast(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3538,7 +3646,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>; // if ([!]Pv[.new]) mem[bhwd](##global)=Rt //===----------------------------------------------------------------------===// -let isAsmParserOnly = 1 in +let Uses = [GP], isAsmParserOnly = 1 in class T_StoreGP MajOp, bit isHalf = 0> : T_StoreAbsGP { @@ -3548,7 +3656,7 @@ class T_StoreGP MajOp, bit isHalf = 0> { // Set BaseOpcode same as absolute addressing instructions so that @@ -3558,7 +3666,7 @@ multiclass ST_GP ; // New-value store - def NAME#newgp : T_StoreAbsGP_NV ; + def NAME#newgp : T_StoreAbsGP_NV ; } } @@ -3594,6 +3702,17 @@ class Stoream_pat; +let AddedComplexity = 30 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; +} + def: Storea_pat, I32, addrgp, S2_storerbgp>; def: Storea_pat, I32, addrgp, S2_storerhgp>; def: Storea_pat, I32, addrgp, S2_storerigp>; @@ -3731,6 +3850,26 @@ defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>; let accessSize = DoubleWordAccess in defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; +class LoadAbs_pats + : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))), + (VT (MI tglobaladdr:$absaddr))>; + +let AddedComplexity = 30 in { + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; + def: LoadAbs_pats ; +} + +let AddedComplexity = 30 in +def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))), + (Zext64 (L4_loadrub_abs tglobaladdr:$absaddr))>; + //===----------------------------------------------------------------------===// // multiclass for load instructions with GP-relative addressing mode. // Rx=mem[bhwd](##global) @@ -3779,14 +3918,14 @@ class LoadGP_pats (VT (MI tglobaladdr:$global))>; let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; def: LoadGP_pats ; def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; + def: LoadGP_pats ; + def: LoadGP_pats ; } // When the Interprocedural Global Variable optimizer realizes that a certain @@ -3819,7 +3958,7 @@ let AddedComplexity = 30 in { // Indexed store word - global address. // memw(Rs+#u6:2)=#S8 let AddedComplexity = 100 in -def: Storex_add_pat; +defm: Storex_add_pat; // Load from a global address that has only one use in the current basic block. let AddedComplexity = 100 in { @@ -3996,6 +4135,10 @@ def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3), let Inst{10-0} = u11_3{13-3}; } + +def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)), + (Y2_dcfetchbo IntRegs:$Rs, u11_3ImmPred:$u11_3)>; + //===----------------------------------------------------------------------===// // Compound instructions //===----------------------------------------------------------------------===// @@ -4008,7 +4151,7 @@ class CJInst_tstbit_R0 : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), ""#px#" = tstbit($Rs, #0); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { + [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<11> r9_2; @@ -4054,7 +4197,7 @@ class CJInst_RR : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, $Rt); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { + [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<4> Rt; bits<11> r9_2; @@ -4108,7 +4251,7 @@ class CJInst_RU5 : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs, #$U5); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { + [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<5> U5; bits<11> r9_2; @@ -4163,7 +4306,7 @@ class CJInst_Rn1 : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2), ""#px#" = cmp."#op#"($Rs,#-1); if (" #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2", - [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon { + [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon { bits<4> Rs; bits<11> r9_2; @@ -4212,7 +4355,7 @@ defm gt : T_pnp_CJInst_Rn1<"gt">; let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 2 in -def J4_jumpseti: CJInst < +def J4_jumpseti: CJInst_JMPSET < (outs IntRegs:$Rd), (ins u6Imm:$U6, brtarget:$r9_2), "$Rd = #$U6 ; jump $r9_2"> { @@ -4232,7 +4375,7 @@ def J4_jumpseti: CJInst < let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1, isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 2 in -def J4_jumpsetr: CJInst < +def J4_jumpsetr: CJInst_JMPSET < (outs IntRegs:$Rd), (ins IntRegs:$Rs, brtarget:$r9_2), "$Rd = $Rs ; jump $r9_2"> { diff --git a/lib/Target/Hexagon/HexagonInstrInfoV60.td b/lib/Target/Hexagon/HexagonInstrInfoV60.td index 897ada081534..c3f09b69ce85 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV60.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -10,6 +10,21 @@ // This file describes the Hexagon V60 instructions in TableGen format. // //===----------------------------------------------------------------------===// +def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ + return isAlignedMemNode(dyn_cast(N)); +}]>; + +def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{ + return !isAlignedMemNode(dyn_cast(N)); +}]>; + +def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ + return isAlignedMemNode(dyn_cast(N)); +}]>; + +def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ + return !isAlignedMemNode(dyn_cast(N)); +}]>; // Vector store @@ -102,7 +117,7 @@ let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in { //===----------------------------------------------------------------------===// // Vector stores with base + immediate offset - unconditional //===----------------------------------------------------------------------===// -let addrMode = BaseImmOffset, accessSize = Vector64Access in +let addrMode = BaseImmOffset, accessSize = Vector64Access, isPredicable = 1 in class T_vstore_ai : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), @@ -133,16 +148,16 @@ let isNVStorable = 1, isNonTemporal = 1 in { } let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { - def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">, + def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vS32Ub_ai">, V6_vS32Ub_ai_enc; - def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">, + def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vS32Ub_ai">, V6_vS32Ub_ai_128B_enc; } //===----------------------------------------------------------------------===// // Vector stores with base + immediate offset - unconditional new //===----------------------------------------------------------------------===// let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1, - Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in + isPredicable = 1, Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in class T_vstore_new_ai : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel { @@ -384,13 +399,15 @@ let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { //===----------------------------------------------------------------------===// // Post increment vector stores with immediate offset. //===----------------------------------------------------------------------===// -let addrMode = PostInc in +let addrMode = PostInc, isPredicable = 1 in class T_vstore_pi : V6_STInst <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [], - "$src1 = $_dst_">, NewValueRel; + "$src1 = $_dst_">, NewValueRel { + let BaseOpcode = baseOp; +} let accessSize = Vector64Access in class T_vstore_pi_64B @@ -398,7 +415,7 @@ class T_vstore_pi_64B let isCodeGenOnly = 1, accessSize = Vector128Access in class T_vstore_pi_128B - : T_vstore_pi ; + : T_vstore_pi ; let isNVStorable = 1 in { def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc; @@ -426,7 +443,7 @@ let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { //===----------------------------------------------------------------------===// let addrMode = PostInc, isNVStore = 1 in let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, - opNewValue = 3, isNVStore = 1 in + isPredicable = 1, opNewValue = 3, isNVStore = 1 in class T_vstore_new_pi : V6_STInst <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), @@ -644,6 +661,7 @@ let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { //===----------------------------------------------------------------------===// // Post increment vector stores with register offset //===----------------------------------------------------------------------===// +let isPredicable = 1 in class T_vstore_ppu : V6_STInst <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), @@ -665,7 +683,7 @@ def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc; // Post increment .new vector stores with register offset //===----------------------------------------------------------------------===// let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, - opNewValue = 3, isNVStore = 1 in + isPredicable = 1, opNewValue = 3, isNVStore = 1 in class T_vstore_new_ppu : V6_STInst <(outs IntRegs:$_dst_), (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), @@ -785,30 +803,46 @@ defm : STrivv_pats ; multiclass vS32b_ai_pats { // Aligned stores - def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr), + def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32Ub_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; // 128B Aligned stores - def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32Ub_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; // Fold Add R+IFF into vector store. - let AddedComplexity = 10 in - def : Pat<(store (VTSgl VectorRegs:$src1), - (add IntRegs:$src2, s4_6ImmPred:$offset)), - (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, - (VTSgl VectorRegs:$src1))>, - Requires<[UseHVXSgl]>; + let AddedComplexity = 10 in { + def : Pat<(alignedstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; - // Fold Add R+IFF into vector store 128B. - let AddedComplexity = 10 in - def : Pat<(store (VTDbl VectorRegs128B:$src1), - (add IntRegs:$src2, s4_7ImmPred:$offset)), - (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, - (VTDbl VectorRegs128B:$src1))>, - Requires<[UseHVXDbl]>; + // Fold Add R+IFF into vector store 128B. + def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + } } defm : vS32b_ai_pats ; @@ -843,25 +877,37 @@ defm : LDrivv_pats ; multiclass vL32b_ai_pats { // Aligned loads - def : Pat < (VTSgl (load IntRegs:$addr)), + def : Pat < (VTSgl (alignedload IntRegs:$addr)), (V6_vL32b_ai IntRegs:$addr, #0) >, Requires<[UseHVXSgl]>; + def : Pat < (VTSgl (unalignedload IntRegs:$addr)), + (V6_vL32Ub_ai IntRegs:$addr, #0) >, + Requires<[UseHVXSgl]>; // 128B Load - def : Pat < (VTDbl (load IntRegs:$addr)), + def : Pat < (VTDbl (alignedload IntRegs:$addr)), (V6_vL32b_ai_128B IntRegs:$addr, #0) >, Requires<[UseHVXDbl]>; + def : Pat < (VTDbl (unalignedload IntRegs:$addr)), + (V6_vL32Ub_ai_128B IntRegs:$addr, #0) >, + Requires<[UseHVXDbl]>; // Fold Add R+IFF into vector load. - let AddedComplexity = 10 in - def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))), - (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, - Requires<[UseHVXDbl]>; - - let AddedComplexity = 10 in - def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))), - (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, - Requires<[UseHVXSgl]>; + let AddedComplexity = 10 in { + def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + + def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; + def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; + } } defm : vL32b_ai_pats ; diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td index 96dd5315b87f..0277d5e3c28c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoVector.td +++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -35,61 +35,12 @@ multiclass bitconvert_64 { (a DoubleRegs:$src)>; } -multiclass bitconvert_vec { - def : Pat <(b (bitconvert (a VectorRegs:$src))), - (b VectorRegs:$src)>; - def : Pat <(a (bitconvert (b VectorRegs:$src))), - (a VectorRegs:$src)>; -} - -multiclass bitconvert_dblvec { - def : Pat <(b (bitconvert (a VecDblRegs:$src))), - (b VecDblRegs:$src)>; - def : Pat <(a (bitconvert (b VecDblRegs:$src))), - (a VecDblRegs:$src)>; -} - -multiclass bitconvert_predvec { - def : Pat <(b (bitconvert (a VecPredRegs:$src))), - (b VectorRegs:$src)>; - def : Pat <(a (bitconvert (b VectorRegs:$src))), - (a VecPredRegs:$src)>; -} - -multiclass bitconvert_dblvec128B { - def : Pat <(b (bitconvert (a VecDblRegs128B:$src))), - (b VecDblRegs128B:$src)>; - def : Pat <(a (bitconvert (b VecDblRegs128B:$src))), - (a VecDblRegs128B:$src)>; -} - -// Bit convert vector types. -defm : bitconvert_32; +// Bit convert vector types to integers. +defm : bitconvert_32; defm : bitconvert_32; -defm : bitconvert_32; - -defm : bitconvert_64; +defm : bitconvert_64; defm : bitconvert_64; defm : bitconvert_64; -defm : bitconvert_64; -defm : bitconvert_64; -defm : bitconvert_64; - -defm : bitconvert_vec; -defm : bitconvert_vec; -defm : bitconvert_vec; - -defm : bitconvert_dblvec; -defm : bitconvert_dblvec; -defm : bitconvert_dblvec; - -defm : bitconvert_dblvec128B; -defm : bitconvert_dblvec128B; -defm : bitconvert_dblvec128B; - -defm : bitconvert_dblvec128B; -defm : bitconvert_dblvec128B; -defm : bitconvert_dblvec128B; // Vector shift support. Vector shifting in Hexagon is rather different // from internal representation of LLVM. diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index b207aaf392f4..a319dd4f9789 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -23,27 +23,29 @@ class T_R_pat class T_P_pat : Pat <(IntID I64:$Rs), - (MI DoubleRegs:$Rs)>; + (MI I64:$Rs)>; class T_II_pat : Pat<(IntID Imm1:$Is, Imm2:$It), (MI Imm1:$Is, Imm2:$It)>; -class T_RI_pat > +class T_RI_pat > : Pat<(IntID I32:$Rs, ImmPred:$It), (MI I32:$Rs, ImmPred:$It)>; -class T_IR_pat > +class T_IR_pat > : Pat<(IntID ImmPred:$Is, I32:$Rt), (MI ImmPred:$Is, I32:$Rt)>; class T_PI_pat : Pat<(IntID I64:$Rs, imm:$It), - (MI DoubleRegs:$Rs, imm:$It)>; + (MI I64:$Rs, imm:$It)>; class T_RP_pat : Pat<(IntID I32:$Rs, I64:$Rt), - (MI I32:$Rs, DoubleRegs:$Rt)>; + (MI I32:$Rs, I64:$Rt)>; class T_RR_pat : Pat <(IntID I32:$Rs, I32:$Rt), @@ -51,19 +53,31 @@ class T_RR_pat class T_PP_pat : Pat <(IntID I64:$Rs, I64:$Rt), - (MI DoubleRegs:$Rs, DoubleRegs:$Rt)>; + (MI I64:$Rs, I64:$Rt)>; + +class T_QQ_pat + : Pat <(IntID I32:$Rs, I32:$Rt), + (MI (C2_tfrrp I32:$Rs), (C2_tfrrp I32:$Rt))>; class T_QII_pat - : Pat <(IntID (i32 PredRegs:$Ps), Imm1:$Is, Imm2:$It), - (MI PredRegs:$Ps, Imm1:$Is, Imm2:$It)>; + : Pat <(IntID I32:$Rp, Imm1:$Is, Imm2:$It), + (MI (C2_tfrrp I32:$Rp), Imm1:$Is, Imm2:$It)>; + +class T_QRR_pat + : Pat <(IntID I32:$Rp, I32:$Rs, I32:$Rt), + (MI (C2_tfrrp I32:$Rp), I32:$Rs, I32:$Rt)>; class T_QRI_pat - : Pat <(IntID (i32 PredRegs:$Ps), I32:$Rs, ImmPred:$Is), - (MI PredRegs:$Ps, I32:$Rs, ImmPred:$Is)>; + : Pat <(IntID I32:$Rp, I32:$Rs, ImmPred:$Is), + (MI (C2_tfrrp I32:$Rp), I32:$Rs, ImmPred:$Is)>; class T_QIR_pat - : Pat <(IntID (i32 PredRegs:$Ps), ImmPred:$Is, I32:$Rs), - (MI PredRegs:$Ps, ImmPred:$Is, I32:$Rs)>; + : Pat <(IntID I32:$Rp, ImmPred:$Is, I32:$Rs), + (MI (C2_tfrrp I32:$Rp), ImmPred:$Is, I32:$Rs)>; + +class T_QPP_pat + : Pat <(IntID I32:$Rp, I64:$Rs, I64:$Rt), + (MI (C2_tfrrp I32:$Rp), I64:$Rs, I64:$Rt)>; class T_RRI_pat : Pat <(IntID I32:$Rs, I32:$Rt, imm:$Iu), @@ -91,31 +105,31 @@ class T_RRR_pat class T_PPI_pat : Pat <(IntID I64:$Rs, I64:$Rt, imm:$Iu), - (MI DoubleRegs:$Rs, DoubleRegs:$Rt, imm:$Iu)>; + (MI I64:$Rs, I64:$Rt, imm:$Iu)>; class T_PII_pat : Pat <(IntID I64:$Rs, imm:$It, imm:$Iu), - (MI DoubleRegs:$Rs, imm:$It, imm:$Iu)>; + (MI I64:$Rs, imm:$It, imm:$Iu)>; class T_PPP_pat : Pat <(IntID I64:$Rs, I64:$Rt, I64:$Ru), - (MI DoubleRegs:$Rs, DoubleRegs:$Rt, DoubleRegs:$Ru)>; + (MI I64:$Rs, I64:$Rt, I64:$Ru)>; class T_PPR_pat : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Ru), - (MI DoubleRegs:$Rs, DoubleRegs:$Rt, I32:$Ru)>; + (MI I64:$Rs, I64:$Rt, I32:$Ru)>; class T_PRR_pat : Pat <(IntID I64:$Rs, I32:$Rt, I32:$Ru), - (MI DoubleRegs:$Rs, I32:$Rt, I32:$Ru)>; + (MI I64:$Rs, I32:$Rt, I32:$Ru)>; class T_PPQ_pat - : Pat <(IntID I64:$Rs, I64:$Rt, (i32 PredRegs:$Ru)), - (MI DoubleRegs:$Rs, DoubleRegs:$Rt, PredRegs:$Ru)>; + : Pat <(IntID I64:$Rs, I64:$Rt, I32:$Rp), + (MI I64:$Rs, I64:$Rt, (C2_tfrrp I32:$Rp))>; class T_PR_pat : Pat <(IntID I64:$Rs, I32:$Rt), - (MI DoubleRegs:$Rs, I32:$Rt)>; + (MI I64:$Rs, I32:$Rt)>; class T_D_pat : Pat<(IntID (F64:$Rs)), @@ -131,7 +145,7 @@ class T_F_pat (MI F32:$Rs)>; class T_FI_pat > + PatLeaf ImmPred = PatLeaf<(i32 imm)>> : Pat<(IntID F32:$Rs, ImmPred:$It), (MI F32:$Rs, ImmPred:$It)>; @@ -148,8 +162,62 @@ class T_FFF_pat (MI F32:$Rs, F32:$Rt, F32:$Ru)>; class T_FFFQ_pat - : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, (i32 PredRegs:$Rx)), - (MI F32:$Rs, F32:$Rt, F32:$Ru, PredRegs:$Rx)>; + : Pat <(IntID F32:$Rs, F32:$Rt, F32:$Ru, I32:$Rp), + (MI F32:$Rs, F32:$Rt, F32:$Ru, (C2_tfrrp I32:$Rp))>; + +class T_Q_RI_pat > + : Pat<(IntID I32:$Rs, ImmPred:$It), + (C2_tfrpr (MI I32:$Rs, ImmPred:$It))>; + +class T_Q_RR_pat + : Pat <(IntID I32:$Rs, I32:$Rt), + (C2_tfrpr (MI I32:$Rs, I32:$Rt))>; + +class T_Q_RP_pat + : Pat <(IntID I32:$Rs, I64:$Rt), + (C2_tfrpr (MI I32:$Rs, I64:$Rt))>; + +class T_Q_PR_pat + : Pat <(IntID I64:$Rs, I32:$Rt), + (C2_tfrpr (MI I64:$Rs, I32:$Rt))>; + +class T_Q_PI_pat + : Pat<(IntID I64:$Rs, imm:$It), + (C2_tfrpr (MI I64:$Rs, imm:$It))>; + +class T_Q_PP_pat + : Pat <(IntID I64:$Rs, I64:$Rt), + (C2_tfrpr (MI I64:$Rs, I64:$Rt))>; + +class T_Q_Q_pat + : Pat <(IntID I32:$Rp), + (C2_tfrpr (MI (C2_tfrrp I32:$Rp)))>; + +class T_Q_QQ_pat + : Pat <(IntID I32:$Rp, I32:$Rq), + (C2_tfrpr (MI (C2_tfrrp I32:$Rp), (C2_tfrrp I32:$Rq)))>; + +class T_Q_FF_pat + : Pat<(IntID F32:$Rs, F32:$Rt), + (C2_tfrpr (MI F32:$Rs, F32:$Rt))>; + +class T_Q_DD_pat + : Pat<(IntID F64:$Rs, F64:$Rt), + (C2_tfrpr (MI F64:$Rs, F64:$Rt))>; + +class T_Q_FI_pat + : Pat<(IntID F32:$Rs, imm:$It), + (C2_tfrpr (MI F32:$Rs, imm:$It))>; + +class T_Q_DI_pat + : Pat<(IntID F64:$Rs, imm:$It), + (C2_tfrpr (MI F64:$Rs, imm:$It))>; + +class T_Q_QQQ_pat + : Pat <(IntID I32:$Rp, I32:$Rq, I32:$Rs), + (C2_tfrpr (MI (C2_tfrrp I32:$Rp), (C2_tfrrp I32:$Rq), + (C2_tfrrp I32:$Rs)))>; //===----------------------------------------------------------------------===// // MPYS / Multipy signed/unsigned halfwords @@ -645,9 +713,9 @@ def : T_PPR_pat ; def : T_PPR_pat ; def : T_PPR_pat ; -/******************************************************************** -* ALU32/ALU * -*********************************************************************/ +//******************************************************************* +// ALU32/ALU +//******************************************************************* def : T_RR_pat; def : T_RI_pat; def : T_RR_pat; @@ -660,31 +728,46 @@ def : T_RR_pat; def : T_RR_pat; // Assembler mapped from Rd32=not(Rs32) to Rd32=sub(#-1,Rs32) -def : Pat <(int_hexagon_A2_not (I32:$Rs)), - (A2_subri -1, IntRegs:$Rs)>; +def : Pat <(int_hexagon_A2_not I32:$Rs), + (A2_subri -1, I32:$Rs)>; // Assembler mapped from Rd32=neg(Rs32) to Rd32=sub(#0,Rs32) -def : Pat <(int_hexagon_A2_neg IntRegs:$Rs), - (A2_subri 0, IntRegs:$Rs)>; +def : Pat <(int_hexagon_A2_neg I32:$Rs), + (A2_subri 0, I32:$Rs)>; // Transfer immediate -def : Pat <(int_hexagon_A2_tfril (I32:$Rs), u16_0ImmPred:$Is), - (A2_tfril IntRegs:$Rs, u16_0ImmPred:$Is)>; -def : Pat <(int_hexagon_A2_tfrih (I32:$Rs), u16_0ImmPred:$Is), - (A2_tfrih IntRegs:$Rs, u16_0ImmPred:$Is)>; +def : Pat <(int_hexagon_A2_tfril I32:$Rs, u16_0ImmPred:$Is), + (A2_tfril I32:$Rs, u16_0ImmPred:$Is)>; +def : Pat <(int_hexagon_A2_tfrih I32:$Rs, u16_0ImmPred:$Is), + (A2_tfrih I32:$Rs, u16_0ImmPred:$Is)>; // Transfer Register/immediate. def : T_R_pat ; def : T_I_pat ; -def : T_I_pat ; + +def ImmExt64: SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i64); +}]>; + +// A2_tfrpi has an operand of type i64. This is necessary, since it is +// generated from "(set I64:$Rd, imm)". That pattern would not appear +// in the DAG, if the immediate was not a 64-bit value. +// The builtin for A2_tfrpi, on the other hand, takes a 32-bit value, +// which makes it impossible to simply replace it with the instruction. +// To connect the builtin with the instruction, the builtin's operand +// needs to be extended to the right type. + +def : Pat<(int_hexagon_A2_tfrpi imm:$Is), + (A2_tfrpi (ImmExt64 $Is))>; // Assembler mapped from Rdd32=Rss32 to Rdd32=combine(Rss.H32,Rss.L32) -def : Pat<(int_hexagon_A2_tfrp DoubleRegs:$src), - (A2_combinew (HiReg DoubleRegs:$src), (LoReg DoubleRegs:$src))>; +def : Pat<(int_hexagon_A2_tfrp I64:$src), + (A2_combinew (HiReg I64:$src), (LoReg I64:$src))>; -/******************************************************************** -* ALU32/PERM * -*********************************************************************/ +//******************************************************************* +// ALU32/PERM +//******************************************************************* // Combine def: T_RR_pat; def: T_RR_pat; @@ -693,10 +776,8 @@ def: T_RR_pat; def: T_II_pat; -def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))), - (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>; - // Mux +def : T_QRR_pat; def : T_QRI_pat; def : T_QIR_pat; def : T_QII_pat; @@ -712,41 +793,36 @@ def : T_R_pat; def : T_R_pat; def : T_R_pat; -/******************************************************************** -* ALU32/PRED * -*********************************************************************/ +//******************************************************************* +// ALU32/PRED +//******************************************************************* // Compare -def : T_RR_pat; -def : T_RR_pat; -def : T_RR_pat; - -def : T_RI_pat; -def : T_RI_pat; -def : T_RI_pat; - -def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)), - (i32 (C2_cmpgti (I32:$src1), - (DEC_CONST_SIGNED s32ImmPred:$src2)))>; - -def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)), - (i32 (C2_cmpgtui (I32:$src1), - (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; - -// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0. -def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)), - (i32 (C2_cmpeq (I32:$src1), (I32:$src1)))>; - -def : Pat <(i32 (int_hexagon_C2_cmplt (I32:$src1), - (I32:$src2))), - (i32 (C2_cmpgt (I32:$src2), (I32:$src1)))>; - -def : Pat <(i32 (int_hexagon_C2_cmpltu (I32:$src1), - (I32:$src2))), - (i32 (C2_cmpgtu (I32:$src2), (I32:$src1)))>; - -/******************************************************************** -* ALU32/VH * -*********************************************************************/ +def : T_Q_RR_pat; +def : T_Q_RR_pat; +def : T_Q_RR_pat; + +def : T_Q_RI_pat; +def : T_Q_RI_pat; +def : T_Q_RI_pat; + +def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32ImmPred:$src2), + (C2_tfrpr (C2_cmpgti I32:$src1, + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; + +def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32ImmPred:$src2), + (C2_tfrpr (C2_cmpgtui I32:$src1, + (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; + +def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0), + (C2_tfrpr (C2_cmpeq I32:$src, I32:$src))>; +def : Pat <(int_hexagon_C2_cmplt I32:$src1, I32:$src2), + (C2_tfrpr (C2_cmpgt I32:$src2, I32:$src1))>; +def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2), + (C2_tfrpr (C2_cmpgtu I32:$src2, I32:$src1))>; + +//******************************************************************* +// ALU32/VH +//******************************************************************* // Vector add, subtract, average halfwords def: T_RR_pat; def: T_RR_pat; @@ -760,28 +836,28 @@ def: T_RR_pat; def: T_RR_pat; def: T_RR_pat; -/******************************************************************** -* ALU64/ALU * -*********************************************************************/ -def: T_RR_pat; -def: T_RR_pat; -def: T_PP_pat; -def: T_PP_pat; +//******************************************************************* +// ALU64/ALU +//******************************************************************* +def: T_RR_pat; +def: T_RR_pat; +def: T_PP_pat; +def: T_PP_pat; -def: T_PP_pat; -def: T_PP_pat; -def: T_PP_pat; +def: T_PP_pat; +def: T_PP_pat; +def: T_PP_pat; -def: T_PP_pat; -def: T_PP_pat; -def: T_PP_pat; +def: T_Q_PP_pat; +def: T_Q_PP_pat; +def: T_Q_PP_pat; -def: T_PP_pat; -def: T_RR_pat; +def: T_PP_pat; +def: T_RR_pat; -/******************************************************************** -* ALU64/VB * -*********************************************************************/ +//******************************************************************* +// ALU64/VB +//******************************************************************* // ALU64 - Vector add def : T_PP_pat ; def : T_PP_pat ; @@ -838,23 +914,22 @@ def : T_PP_pat ; def : T_PP_pat ; // ALU64 - Vector compare bytes -def : T_PP_pat ; -def : T_PP_pat ; -def : T_PP_pat ; +def : T_Q_PP_pat ; +def : T_Q_PP_pat ; +def : T_Q_PP_pat ; // ALU64 - Vector compare halfwords -def : T_PP_pat ; -def : T_PP_pat ; -def : T_PP_pat ; +def : T_Q_PP_pat ; +def : T_Q_PP_pat ; +def : T_Q_PP_pat ; // ALU64 - Vector compare words -def : T_PP_pat ; -def : T_PP_pat ; -def : T_PP_pat ; +def : T_Q_PP_pat ; +def : T_Q_PP_pat ; +def : T_Q_PP_pat ; // ALU64 / VB / Vector mux. -def : Pat<(int_hexagon_C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), - (C2_vmux PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def : T_QPP_pat ; // MPY - Multiply and use full result // Rdd = mpy[u](Rs, Rt) @@ -903,35 +978,24 @@ def : T_PRR_pat ; def : T_PRR_pat ; def : T_PRR_pat ; -/******************************************************************** -* CR * -*********************************************************************/ -class qi_CRInst_qi_pat : - Pat<(i32 (IntID IntRegs:$Rs)), - (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs))))>; - -class qi_CRInst_qiqi_pat : - Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt)), - (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), (C2_tfrrp IntRegs:$Rt))))>; - -def: qi_CRInst_qi_pat; -def: qi_CRInst_qi_pat; -def: qi_CRInst_qi_pat; +//******************************************************************* +// CR +//******************************************************************* +def: T_Q_Q_pat; +def: T_Q_Q_pat; +def: T_Q_Q_pat; +def: T_Q_Q_pat; -def: qi_CRInst_qiqi_pat; -def: qi_CRInst_qiqi_pat; -def: qi_CRInst_qiqi_pat; -def: qi_CRInst_qiqi_pat; -def: qi_CRInst_qiqi_pat; - -// Assembler mapped from Pd4=Ps4 to Pd4=or(Ps4,Ps4) -def : Pat<(int_hexagon_C2_pxfer_map PredRegs:$src), - (C2_pxfer_map PredRegs:$src)>; +def: T_Q_QQ_pat; +def: T_Q_QQ_pat; +def: T_Q_QQ_pat; +def: T_Q_QQ_pat; +def: T_Q_QQ_pat; // Multiply 32x32 and use lower result def : T_RRI_pat ; def : T_RRI_pat ; -def : T_RRR_pat ; +def : T_RRR_pat ; // Subtract and accumulate def : T_RRR_pat ; @@ -945,54 +1009,45 @@ def : T_RRI_pat ; // XOR and XOR with destination def : T_RRR_pat ; -class MType_R32_pat : - Pat <(IntID IntRegs:$src1, IntRegs:$src2), - (OutputInst IntRegs:$src1, IntRegs:$src2)>; - // Vector dual multiply with round and pack - -def : Pat <(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2), - (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>; - -def : Pat <(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2), - (M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>; +def : T_PP_pat ; +def : T_PP_pat ; // Vector multiply halfwords with round and pack - -def : MType_R32_pat ; -def : MType_R32_pat ; +def : T_RR_pat ; +def : T_RR_pat ; // Multiply and use lower result -def : MType_R32_pat ; -def : T_RI_pat; +def : T_RR_pat ; +def : T_RI_pat ; // Assembler mapped from Rd32=mpyui(Rs32,Rt32) to Rd32=mpyi(Rs32,Rt32) -def : MType_R32_pat ; +def : T_RR_pat ; // Multiply and use upper result -def : MType_R32_pat ; -def : MType_R32_pat ; -def : MType_R32_pat ; -def : MType_R32_pat ; -def : MType_R32_pat ; +def : T_RR_pat ; +def : T_RR_pat ; +def : T_RR_pat ; +def : T_RR_pat ; +def : T_RR_pat ; // Complex multiply with round and pack // Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat -def : MType_R32_pat ; -def : MType_R32_pat ; -def : MType_R32_pat ; -def : MType_R32_pat ; - -/******************************************************************** -* STYPE/ALU * -*********************************************************************/ +def : T_RR_pat ; +def : T_RR_pat ; +def : T_RR_pat ; +def : T_RR_pat ; + +//******************************************************************* +// STYPE/ALU +//******************************************************************* def : T_P_pat ; def : T_P_pat ; def : T_P_pat ; -/******************************************************************** -* STYPE/BIT * -*********************************************************************/ +//******************************************************************* +// STYPE/BIT +//******************************************************************* // Count leading/trailing def: T_R_pat; @@ -1023,6 +1078,11 @@ def : T_PP_pat ; // Linear feedback-shift Iteration. def : T_PP_pat ; +// Vector align +// Need custom lowering +def : T_PPQ_pat ; +def : T_PPI_pat ; + // Vector splice def : T_PPQ_pat ; def : T_PPI_pat ; @@ -1037,26 +1097,22 @@ def : T_RP_pat ; def : T_PP_pat ; // Insert bitfield -def : Pat <(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, - DoubleRegs:$src3), - (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>; +def : Pat <(int_hexagon_S2_insert_rp I32:$src1, I32:$src2, I64:$src3), + (S2_insert_rp I32:$src1, I32:$src2, I64:$src3)>; -def : Pat<(i64 (int_hexagon_S2_insertp_rp (I64:$src1), - (I64:$src2), (I64:$src3))), - (i64 (S2_insertp_rp (I64:$src1), (I64:$src2), - (I64:$src3)))>; +def : Pat<(i64 (int_hexagon_S2_insertp_rp I64:$src1, I64:$src2, I64:$src3)), + (i64 (S2_insertp_rp I64:$src1, I64:$src2, I64:$src3))>; -def : Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, +def : Pat<(int_hexagon_S2_insert I32:$src1, I32:$src2, u5ImmPred:$src3, u5ImmPred:$src4), - (S2_insert IntRegs:$src1, IntRegs:$src2, + (S2_insert I32:$src1, I32:$src2, u5ImmPred:$src3, u5ImmPred:$src4)>; -def : Pat<(i64 (int_hexagon_S2_insertp (I64:$src1), - (I64:$src2), u6ImmPred:$src3, u6ImmPred:$src4)), - (i64 (S2_insertp (I64:$src1), (I64:$src2), +def : Pat<(i64 (int_hexagon_S2_insertp I64:$src1, I64:$src2, + u6ImmPred:$src3, u6ImmPred:$src4)), + (i64 (S2_insertp I64:$src1, I64:$src2, u6ImmPred:$src3, u6ImmPred:$src4))>; - // Innterleave/deinterleave def : T_P_pat ; def : T_P_pat ; @@ -1071,21 +1127,21 @@ def: T_RR_pat; def: T_RR_pat; // Test Bit -def: T_RI_pat; -def: T_RR_pat; +def: T_Q_RI_pat; +def: T_Q_RR_pat; -/******************************************************************** -* STYPE/COMPLEX * -*********************************************************************/ +//******************************************************************* +// STYPE/COMPLEX +//******************************************************************* // Vector Complex conjugate def : T_P_pat ; // Vector Complex rotate def : T_PR_pat ; -/******************************************************************** -* STYPE/PERM * -*********************************************************************/ +//******************************************************************* +// STYPE/PERM +//******************************************************************* // Vector saturate without pack def : T_P_pat ; @@ -1093,28 +1149,26 @@ def : T_P_pat ; def : T_P_pat ; def : T_P_pat ; -/******************************************************************** -* STYPE/PRED * -*********************************************************************/ +//******************************************************************* +// STYPE/PRED +//******************************************************************* // Predicate transfer -def: Pat<(i32 (int_hexagon_C2_tfrpr (I32:$Rs))), - (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>; -def: Pat<(i32 (int_hexagon_C2_tfrrp (I32:$Rs))), - (i32 (C2_tfrpr (C2_tfrrp (I32:$Rs))))>; +def: Pat<(i32 (int_hexagon_C2_tfrpr I32:$Rs)), + (i32 (C2_tfrpr (C2_tfrrp I32:$Rs)))>; +def: Pat<(i32 (int_hexagon_C2_tfrrp I32:$Rs)), + (i32 (C2_tfrpr (C2_tfrrp I32:$Rs)))>; // Mask generate from predicate -def: Pat<(i64 (int_hexagon_C2_mask (I32:$Rs))), - (i64 (C2_mask (C2_tfrrp (I32:$Rs))))>; +def: Pat<(i64 (int_hexagon_C2_mask I32:$Rs)), + (i64 (C2_mask (C2_tfrrp I32:$Rs)))>; // Viterbi pack even and odd predicate bits -def: Pat<(i32 (int_hexagon_C2_vitpack (I32:$Rs), (I32:$Rt))), - (i32 (C2_vitpack (C2_tfrrp (I32:$Rs)), - (C2_tfrrp (I32:$Rt))))>; +def: T_QQ_pat; -/******************************************************************** -* STYPE/SHIFT * -*********************************************************************/ +//******************************************************************* +// STYPE/SHIFT +//******************************************************************* def : T_PI_pat ; def : T_PI_pat ; @@ -1185,8 +1239,8 @@ def : T_RI_pat ; //===----------------------------------------------------------------------===// class S2op_tableidx_pat - : Pat <(IntID IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, u5ImmPred:$src4), - (OutputInst IntRegs:$src1, IntRegs:$src2, u4ImmPred:$src3, + : Pat <(IntID I32:$src1, I32:$src2, u4ImmPred:$src3, u5ImmPred:$src4), + (OutputInst I32:$src1, I32:$src2, u4ImmPred:$src3, (XformImm u5ImmPred:$src4))>; @@ -1195,9 +1249,9 @@ class S2op_tableidx_pat ; def : S2op_tableidx_pat ; -/******************************************************************** -* STYPE/VH * -*********************************************************************/ +//******************************************************************* +// STYPE/VH +//******************************************************************* // Vector absolute value halfwords with and without saturation // Rdd64=vabsh(Rss64)[:sat] @@ -1229,9 +1283,9 @@ def : T_PR_pat ; def : T_PR_pat ; def : T_PR_pat ; -/******************************************************************** -* STYPE/VW * -*********************************************************************/ +//******************************************************************* +// STYPE/VW +//******************************************************************* // Vector absolute value words with and without saturation def : T_P_pat ; @@ -1251,43 +1305,42 @@ def : T_PR_pat ; def : T_PR_pat ; // Vector shift words with truncate and pack - def : T_PR_pat ; +// Load/store locked. def : T_R_pat; def : T_R_pat; -def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))), - (i32 (C2_tfrpr (S2_storew_locked (I32:$Rs), (I32:$Rt))))>; -def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))), - (i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>; +def : Pat<(int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), + (C2_tfrpr (S2_storew_locked I32:$Rs, I32:$Rt))>; +def : Pat<(int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), + (C2_tfrpr (S4_stored_locked I32:$Rs, I64:$Rt))>; -/******************************************************************** -* ST -*********************************************************************/ +//******************************************************************* +// ST +//******************************************************************* class T_stb_pat : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru), - (MI I32:$Rs, Val:$Rt, I32:$Ru)>; + (MI I32:$Rs, I32:$Ru, Val:$Rt)>; -def : T_stb_pat ; -def : T_stb_pat ; -def : T_stb_pat ; -def : T_stb_pat ; -def : T_stb_pat ; +def : T_stb_pat ; +def : T_stb_pat ; +def : T_stb_pat ; +def : T_stb_pat ; +def : T_stb_pat ; class T_stc_pat : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), - (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>; + (MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" include "HexagonIntrinsicsV5.td" include "HexagonIntrinsicsV60.td" - diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td index c80a188d82e7..578973db1933 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -60,71 +60,60 @@ def : T_PPR_pat ; def : T_PPR_pat ; // Multiply and use upper result -def : MType_R32_pat ; -def : MType_R32_pat ; -def : MType_R32_pat ; -def : MType_R32_pat ; -def : MType_R32_pat ; +def : T_RR_pat ; +def : T_RR_pat ; +def : T_RR_pat ; +def : T_RR_pat ; +def : T_RR_pat ; -// Vector reduce add unsigned halfwords -def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2), - (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>; - -def : T_P_pat ; - -def: T_P_pat ; -def: T_P_pat ; -def: T_RR_pat; -def: T_RR_pat; -def: T_RI_pat; - - -class vcmpImm_pat : - Pat <(IntID (i64 DoubleRegs:$src1), immPred:$src2), - (MI (i64 DoubleRegs:$src1), immPred:$src2)>; - -def : vcmpImm_pat ; -def : vcmpImm_pat ; -def : vcmpImm_pat ; - -def : vcmpImm_pat ; -def : vcmpImm_pat ; -def : vcmpImm_pat ; - -def : vcmpImm_pat ; -def : vcmpImm_pat ; -def : vcmpImm_pat ; - -def : T_PP_pat; +def : T_PP_pat ; +def : T_PP_pat ; -def : T_RR_pat; -def : T_RR_pat; -def : T_RR_pat; -def : T_RR_pat; -def : T_RR_pat; -def : T_RR_pat; - -def : T_RI_pat; -def : T_RI_pat; -def : T_RI_pat; - -def : T_RI_pat; -def : T_RI_pat; -def : T_RI_pat; - -def : T_RP_pat ; - -def : T_PR_pat; - -def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, - IntRegs:$src3), - (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def : T_IRR_pat ; -def : T_IRI_pat ; +// Vector reduce add unsigned halfwords +def : T_PP_pat ; + +def: T_P_pat; +def: T_P_pat; +def: T_P_pat; + +def: T_Q_RR_pat; +def: T_Q_RR_pat; +def: T_Q_RI_pat; + +def : T_Q_PI_pat; +def : T_Q_PI_pat; +def : T_Q_PI_pat; +def : T_Q_PI_pat; +def : T_Q_PI_pat; +def : T_Q_PI_pat; +def : T_Q_PI_pat; +def : T_Q_PI_pat; +def : T_Q_PI_pat; +def : T_Q_PP_pat; + +def : T_Q_RR_pat; +def : T_Q_RR_pat; +def : T_Q_RR_pat; +def : T_Q_RR_pat; +def : T_Q_RR_pat; +def : T_Q_RR_pat; + +def : T_Q_RI_pat; +def : T_Q_RI_pat; +def : T_Q_RI_pat; + +def : T_Q_RI_pat; +def : T_Q_RI_pat; +def : T_Q_RI_pat; + +def : T_Q_RP_pat; +def : T_Q_PR_pat; + +def : T_RRR_pat ; +def : T_IRR_pat ; +def : T_IRI_pat ; def : T_RIR_pat ; -def : T_RRI_pat ; -// Multiply 32x32 and use upper result +def : T_RRI_pat ; def : T_RRR_pat ; def : T_RRR_pat ; @@ -210,41 +199,46 @@ def : T_IRI_pat ; // Split bitfield def : T_RI_pat ; -def : T_RR_pat ; +def : T_RR_pat ; -def: T_RR_pat; +def: T_RR_pat; -def: T_RI_pat; -def: T_RR_pat; +def: T_Q_RI_pat; +def: T_Q_RR_pat; -def: T_RI_pat; -def: T_PI_pat; -def: T_P_pat ; +def: T_RI_pat; +def: T_PI_pat; +def: T_P_pat ; -/******************************************************************** -* ALU32/ALU * -*********************************************************************/ +//******************************************************************* +// ALU32/ALU +//******************************************************************* // ALU32 / ALU / Logical Operations. def: T_RR_pat; def: T_RR_pat; -/******************************************************************** -* ALU32/PERM * -*********************************************************************/ +//******************************************************************* +// ALU32/PERM +//******************************************************************* // Combine Words Into Doublewords. def: T_RI_pat; def: T_IR_pat; -/******************************************************************** -* ALU32/PRED * -*********************************************************************/ +//******************************************************************* +// ALU32/PRED +//******************************************************************* // Compare -def : T_RI_pat; -def : T_RI_pat; -def : T_RI_pat; +def : T_Q_RI_pat; +def : T_Q_RI_pat; +def : T_Q_RI_pat; + +// Compare To General Register. +def: T_Q_RR_pat; +def: T_Q_RR_pat; +def: T_Q_RR_pat; def: T_RR_pat; def: T_RR_pat; @@ -252,30 +246,23 @@ def: T_RR_pat; def: T_RI_pat; def: T_RI_pat; -/******************************************************************** -* CR * -*********************************************************************/ +//******************************************************************* +// CR +//******************************************************************* // CR / Logical Operations On Predicates. - -class qi_CRInst_qiqiqi_pat : - Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)), - (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs), - (C2_tfrrp IntRegs:$Rt), - (C2_tfrrp IntRegs:$Ru))))>; - -def: qi_CRInst_qiqiqi_pat; -def: qi_CRInst_qiqiqi_pat; -def: qi_CRInst_qiqiqi_pat; -def: qi_CRInst_qiqiqi_pat; -def: qi_CRInst_qiqiqi_pat; -def: qi_CRInst_qiqiqi_pat; -def: qi_CRInst_qiqiqi_pat; -def: qi_CRInst_qiqiqi_pat; - -/******************************************************************** -* XTYPE/ALU * -*********************************************************************/ +def: T_Q_QQQ_pat; +def: T_Q_QQQ_pat; +def: T_Q_QQQ_pat; +def: T_Q_QQQ_pat; +def: T_Q_QQQ_pat; +def: T_Q_QQQ_pat; +def: T_Q_QQQ_pat; +def: T_Q_QQQ_pat; + +//******************************************************************* +// XTYPE/ALU +//******************************************************************* // Add And Accumulate. diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td index 60e6b1eb4479..f27a63e20e61 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV5.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td @@ -43,8 +43,8 @@ def : T_FF_pat; def : T_FF_pat; def : T_F_pat ; -def: qi_CRInst_qiqi_pat; -def: qi_CRInst_qiqi_pat; +def : T_Q_QQ_pat; +def : T_Q_QQ_pat; def : T_P_pat ; def : T_PI_pat ; @@ -65,15 +65,15 @@ def : T_FFF_pat ; def : T_FFFQ_pat ; // Compare floating-point value -def : T_FF_pat ; -def : T_FF_pat ; -def : T_FF_pat ; -def : T_FF_pat ; +def : T_Q_FF_pat ; +def : T_Q_FF_pat ; +def : T_Q_FF_pat ; +def : T_Q_FF_pat ; -def : T_DD_pat ; -def : T_DD_pat ; -def : T_DD_pat ; -def : T_DD_pat ; +def : T_Q_DD_pat ; +def : T_Q_DD_pat ; +def : T_Q_DD_pat ; +def : T_Q_DD_pat ; // Create floating-point value def : T_I_pat ; @@ -81,8 +81,8 @@ def : T_I_pat ; def : T_I_pat ; def : T_I_pat ; -def : T_DI_pat ; -def : T_FI_pat ; +def : T_Q_DI_pat ; +def : T_Q_FI_pat ; def : T_F_pat ; def : T_D_pat ; def : T_R_pat ; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/lib/Target/Hexagon/HexagonIntrinsicsV60.td index 24a3e4d36de9..82bc91bb3021 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV60.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV60.td @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// + let isCodeGenOnly = 1 in { def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst), (ins ), @@ -22,6 +23,7 @@ def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), "$dst=#0", [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>; } + let isPseudo = 1 in def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst), (ins VecDblRegs:$src1), @@ -800,7 +802,7 @@ defm : T_VQR_pat ; defm : T_QVR_pat ; defm : T_QR_pat ; defm : T_R_pat ; -defm : T_R_pat ; +defm : T_R_pat ; defm : T_VR_pat ; defm : T_VVR_pat ; diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 624c0f6cf49d..a5dc002642c8 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -32,7 +32,7 @@ namespace llvm { } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, - HexagonAsmPrinter &Printer) { + HexagonAsmPrinter &Printer, bool MustExtend) { MCContext &MC = Printer.OutContext; const MCExpr *ME; @@ -58,6 +58,21 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, case HexagonII::MO_GPREL: RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL; break; + case HexagonII::MO_GDGOT: + RelocationType = MCSymbolRefExpr::VK_Hexagon_GD_GOT; + break; + case HexagonII::MO_GDPLT: + RelocationType = MCSymbolRefExpr::VK_Hexagon_GD_PLT; + break; + case HexagonII::MO_IE: + RelocationType = MCSymbolRefExpr::VK_Hexagon_IE; + break; + case HexagonII::MO_IEGOT: + RelocationType = MCSymbolRefExpr::VK_Hexagon_IE_GOT; + break; + case HexagonII::MO_TPREL: + RelocationType = MCSymbolRefExpr::VK_TPREL; + break; } ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC); @@ -66,6 +81,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC), MC); + ME = HexagonMCExpr::create(ME, MC); + HexagonMCInstrInfo::setMustExtend(*ME, MustExtend); return MCOperand::createExpr(ME); } @@ -84,13 +101,11 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, MCI->setOpcode(MI->getOpcode()); assert(MCI->getOpcode() == static_cast(MI->getOpcode()) && "MCI opcode should have been set on construction"); - bool MustExtend = false; for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCO; - if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) - MustExtend = true; + bool MustExtend = MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended; switch (MO.getType()) { default: @@ -105,42 +120,51 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, APFloat Val = MO.getFPImm()->getValueAPF(); // FP immediates are used only when setting GPRs, so they may be dealt // with like regular immediates from this point on. - MCO = MCOperand::createExpr( - MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(), - AP.OutContext)); + auto Expr = HexagonMCExpr::create( + MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(), + AP.OutContext), + AP.OutContext); + HexagonMCInstrInfo::setMustExtend(*Expr, MustExtend); + MCO = MCOperand::createExpr(Expr); break; } - case MachineOperand::MO_Immediate: - MCO = MCOperand::createExpr( - MCConstantExpr::create(MO.getImm(), AP.OutContext)); + case MachineOperand::MO_Immediate: { + auto Expr = HexagonMCExpr::create( + MCConstantExpr::create(MO.getImm(), AP.OutContext), AP.OutContext); + HexagonMCInstrInfo::setMustExtend(*Expr, MustExtend); + MCO = MCOperand::createExpr(Expr); break; - case MachineOperand::MO_MachineBasicBlock: - MCO = MCOperand::createExpr - (MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), - AP.OutContext)); + } + case MachineOperand::MO_MachineBasicBlock: { + MCExpr const *Expr = MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), + AP.OutContext); + Expr = HexagonMCExpr::create(Expr, AP.OutContext); + HexagonMCInstrInfo::setMustExtend(*Expr, MustExtend); + MCO = MCOperand::createExpr(Expr); break; + } case MachineOperand::MO_GlobalAddress: - MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP); + MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP, MustExtend); break; case MachineOperand::MO_ExternalSymbol: MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), - AP); + AP, MustExtend); break; case MachineOperand::MO_JumpTableIndex: - MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); + MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, MustExtend); break; case MachineOperand::MO_ConstantPoolIndex: - MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); + MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, MustExtend); break; case MachineOperand::MO_BlockAddress: - MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP); + MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP, + MustExtend); break; } MCI->addOperand(MCO); } AP.HexagonProcessInstruction(*MCI, *MI); - HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI, - MustExtend); + HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI); MCB.addOperand(MCOperand::createInst(MCI)); } diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index 76723586c66e..26c5b63fec6c 100644 --- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -27,7 +27,8 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo { // returning the value of the returned struct in a register. This field // holds the virtual register into which the sret argument is passed. unsigned SRetReturnReg; - unsigned StackAlignBaseReg; + unsigned StackAlignBaseVReg; // Aligned-stack base register (virtual) + unsigned StackAlignBasePhysReg; // (physical) std::vector AllocaAdjustInsts; int VarArgsFrameIndex; bool HasClobberLR; @@ -36,13 +37,12 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo { virtual void anchor(); public: - HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseReg(0), - HasClobberLR(0), HasEHReturn(false) {} + HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0), + StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {} HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), - StackAlignBaseReg(0), - HasClobberLR(0), - HasEHReturn(false) {} + StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0), + HasEHReturn(false) {} unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } @@ -77,8 +77,11 @@ public: bool hasEHReturn() const { return HasEHReturn; }; void setHasEHReturn(bool H = true) { HasEHReturn = H; }; - void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; } - unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; } + void setStackAlignBaseVReg(unsigned R) { StackAlignBaseVReg = R; } + unsigned getStackAlignBaseVReg() const { return StackAlignBaseVReg; } + + void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; } + unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; } }; } // End llvm namespace diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 7a52d6874c33..6dcac0dc7ee2 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -13,28 +13,126 @@ //===----------------------------------------------------------------------===// #include "HexagonMachineScheduler.h" +#include "HexagonSubtarget.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/IR/Function.h" +#include +#include + +static cl::opt IgnoreBBRegPressure("ignore-bb-reg-pressure", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt SchedPredsCloser("sched-preds-closer", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt SchedDebugVerboseLevel("misched-verbose-level", + cl::Hidden, cl::ZeroOrMore, cl::init(1)); + +static cl::opt TopUseShorterTie("top-use-shorter-tie", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt BotUseShorterTie("bot-use-shorter-tie", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt DisableTCTie("disable-tc-tie", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt SchedRetvalOptimization("sched-retval-optimization", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +// Check if the scheduler should penalize instructions that are available to +// early due to a zero-latency dependence. +static cl::opt CheckEarlyAvail("check-early-avail", cl::Hidden, + cl::ZeroOrMore, cl::init(true)); + using namespace llvm; #define DEBUG_TYPE "misched" -/// Platform-specific modifications to DAG. -void VLIWMachineScheduler::postprocessDAG() { +class HexagonCallMutation : public ScheduleDAGMutation { +public: + void apply(ScheduleDAGInstrs *DAG) override; +private: + bool shouldTFRICallBind(const HexagonInstrInfo &HII, + const SUnit &Inst1, const SUnit &Inst2) const; +}; + +// Check if a call and subsequent A2_tfrpi instructions should maintain +// scheduling affinity. We are looking for the TFRI to be consumed in +// the next instruction. This should help reduce the instances of +// double register pairs being allocated and scheduled before a call +// when not used until after the call. This situation is exacerbated +// by the fact that we allocate the pair from the callee saves list, +// leading to excess spills and restores. +bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII, + const SUnit &Inst1, const SUnit &Inst2) const { + if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi) + return false; + + // TypeXTYPE are 64 bit operations. + if (HII.getType(Inst2.getInstr()) == HexagonII::TypeXTYPE) + return true; + return false; +} + +void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) { SUnit* LastSequentialCall = nullptr; + unsigned VRegHoldingRet = 0; + unsigned RetRegister; + SUnit* LastUseOfRet = nullptr; + auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo(); + auto &HII = *DAG->MF.getSubtarget().getInstrInfo(); + // Currently we only catch the situation when compare gets scheduled // before preceding call. - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) { // Remember the call. - if (SUnits[su].getInstr()->isCall()) - LastSequentialCall = &(SUnits[su]); + if (DAG->SUnits[su].getInstr()->isCall()) + LastSequentialCall = &DAG->SUnits[su]; // Look for a compare that defines a predicate. - else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall) - SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall) + DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + // Look for call and tfri* instructions. + else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 && + shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) + DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); + // Prevent redundant register copies between two calls, which are caused by + // both the return value and the argument for the next call being in %R0. + // Example: + // 1: + // 2: %VregX = COPY %R0 + // 3: + // 4: %R0 = ... + // 5: + // The scheduler would often swap 3 and 4, so an additional register is + // needed. This code inserts a Barrier dependence between 3 & 4 to prevent + // this. The same applies for %D0 and %V0/%W0, which are also handled. + else if (SchedRetvalOptimization) { + const MachineInstr *MI = DAG->SUnits[su].getInstr(); + if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || + MI->readsRegister(Hexagon::V0, &TRI))) { + // %vregX = COPY %R0 + VRegHoldingRet = MI->getOperand(0).getReg(); + RetRegister = MI->getOperand(1).getReg(); + LastUseOfRet = nullptr; + } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) + // + LastUseOfRet = &DAG->SUnits[su]; + else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) + // %R0 = ... + DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); + } } } + +/// Save the last formed packet +void VLIWResourceModel::savePacket() { + OldPacket = Packet; +} + /// Check if scheduling of this SU is possible /// in the current packet. /// It is _not_ precise (statefull), it is more like @@ -48,7 +146,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { // in the current cycle. switch (SU->getInstr()->getOpcode()) { default: - if (!ResourcesModel->canReserveResources(SU->getInstr())) + if (!ResourcesModel->canReserveResources(*SU->getInstr())) return false; case TargetOpcode::EXTRACT_SUBREG: case TargetOpcode::INSERT_SUBREG: @@ -60,11 +158,19 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { break; } + MachineFunction &MF = *SU->getInstr()->getParent()->getParent(); + auto &QII = *MF.getSubtarget().getInstrInfo(); + // Now see if there are no other dependencies to instructions already // in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) { if (Packet[i]->Succs.size() == 0) continue; + + // Enable .cur formation. + if (QII.mayBeCurLoad(Packet[i]->getInstr())) + continue; + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), E = Packet[i]->Succs.end(); I != E; ++I) { // Since we do not add pseudos to packets, might as well @@ -85,6 +191,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { // Artificially reset state. if (!SU) { ResourcesModel->clearResources(); + savePacket(); Packet.clear(); TotalPackets++; return false; @@ -93,6 +200,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { // start a new one. if (!isResourceAvailable(SU)) { ResourcesModel->clearResources(); + savePacket(); Packet.clear(); TotalPackets++; startNewCycle = true; @@ -100,7 +208,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { switch (SU->getInstr()->getOpcode()) { default: - ResourcesModel->reserveResources(SU->getInstr()); + ResourcesModel->reserveResources(*SU->getInstr()); break; case TargetOpcode::EXTRACT_SUBREG: case TargetOpcode::INSERT_SUBREG: @@ -129,6 +237,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { // we start fresh. if (Packet.size() >= SchedModel->getIssueWidth()) { ResourcesModel->clearResources(); + savePacket(); Packet.clear(); TotalPackets++; startNewCycle = true; @@ -150,19 +259,12 @@ void VLIWMachineScheduler::schedule() { buildDAGWithRegPressure(); - // Postprocess the DAG to add platform-specific artificial dependencies. - postprocessDAG(); - SmallVector TopRoots, BotRoots; findRootsAndBiasEdges(TopRoots, BotRoots); // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); - // To view Height/Depth correctly, they should be accessed at least once. - // - // FIXME: SUnit::dumpAll always recompute depth and height now. The max - // depth/height could be computed directly from the roots and leaves. DEBUG(unsigned maxH = 0; for (unsigned su = 0, e = SUnits.size(); su != e; ++su) if (SUnits[su].getHeight() > maxH) @@ -197,6 +299,13 @@ void VLIWMachineScheduler::schedule() { assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { @@ -223,16 +332,18 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && "-misched-topdown incompatible with -misched-bottomup"); + + DAG->addMutation(make_unique()); + DAG->addMutation(make_unique()); } void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; - for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned MinLatency = I->getLatency(); + for (const SDep &PI : SU->Preds) { + unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; + unsigned MinLatency = PI.getLatency(); #ifndef NDEBUG Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); #endif @@ -321,8 +432,8 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { } CheckPending = true; - DEBUG(dbgs() << "*** " << Available.getName() << " cycle " - << CurrCycle << '\n'); + DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " + << CurrCycle << '\n'); } /// Move the boundary of scheduled code by one SUnit. @@ -414,16 +525,38 @@ SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { #ifndef NDEBUG void ConvergingVLIWScheduler::traceCandidate(const char *Label, - const ReadyQueue &Q, - SUnit *SU, PressureChange P) { + const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) { dbgs() << Label << " " << Q.getName() << " "; if (P.isValid()) dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" << P.getUnitInc() << " "; else dbgs() << " "; + dbgs() << "cost(" << Cost << ")\t"; SU->dump(DAG); } + +// Very detailed queue dump, to be used with higher verbosity levels. +void ConvergingVLIWScheduler::readyQueueVerboseDump( + const RegPressureTracker &RPTracker, SchedCandidate &Candidate, + ReadyQueue &Q) { + RegPressureTracker &TempTracker = const_cast(RPTracker); + + dbgs() << ">>> " << Q.getName() << "\n"; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + std::stringstream dbgstr; + dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; + dbgs() << dbgstr.str(); + SchedulingCost(Q, *I, Candidate, RPDelta, true); + dbgs() << "\t"; + (*I)->getInstr()->dump(); + } + dbgs() << "\n"; +} #endif /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor @@ -466,6 +599,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) { // heuristic components for cost computation. static const unsigned PriorityOne = 200; static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 75; static const unsigned ScaleTwo = 10; static const unsigned FactorOne = 2; @@ -482,25 +616,50 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, if (!SU || SU->isScheduled) return ResCount; + MachineInstr *Instr = SU->getInstr(); + + DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); // Forced priority is high. - if (SU->isScheduleHigh) + if (SU->isScheduleHigh) { ResCount += PriorityOne; + DEBUG(dbgs() << "H|"); + } // Critical path first. if (Q.getID() == TopQID) { ResCount += (SU->getHeight() * ScaleTwo); + DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; + dbgs() << dbgstr.str(); + }); + // If resources are available for it, multiply the // chance of scheduling. - if (Top.ResourceModel->isResourceAvailable(SU)) + if (Top.ResourceModel->isResourceAvailable(SU)) { ResCount <<= FactorOne; + ResCount += PriorityThree; + DEBUG(if (verbose) dbgs() << "A|"); + } else + DEBUG(if (verbose) dbgs() << " |"); } else { ResCount += (SU->getDepth() * ScaleTwo); + DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; + dbgs() << dbgstr.str(); + }); + // If resources are available for it, multiply the // chance of scheduling. - if (Bot.ResourceModel->isResourceAvailable(SU)) + if (Bot.ResourceModel->isResourceAvailable(SU)) { ResCount <<= FactorOne; + ResCount += PriorityThree; + DEBUG(if (verbose) dbgs() << "A|"); + } else + DEBUG(if (verbose) dbgs() << " |"); } unsigned NumNodesBlocking = 0; @@ -509,24 +668,121 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Look at all of the successors of this node. // Count the number of nodes that // this node is the sole unscheduled node for. - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) - if (getSingleUnscheduledPred(I->getSUnit()) == SU) + for (const SDep &SI : SU->Succs) + if (getSingleUnscheduledPred(SI.getSUnit()) == SU) ++NumNodesBlocking; } else { // How many unscheduled predecessors block this node? - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) - if (getSingleUnscheduledSucc(I->getSUnit()) == SU) + for (const SDep &PI : SU->Preds) + if (getSingleUnscheduledSucc(PI.getSUnit()) == SU) ++NumNodesBlocking; } ResCount += (NumNodesBlocking * ScaleTwo); + DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; + dbgs() << dbgstr.str(); + }); + // Factor in reg pressure as a heuristic. - ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo); - ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo); + if (!IgnoreBBRegPressure) { + // Decrease priority by the amount that register pressure exceeds the limit. + ResCount -= (Delta.Excess.getUnitInc()*PriorityOne); + // Decrease priority if register pressure exceeds the limit. + ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne); + // Decrease priority slightly if register pressure would increase over the + // current maximum. + ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo); + DEBUG(if (verbose) { + dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" + << Delta.CriticalMax.getUnitInc() <<"/" + << Delta.CurrentMax.getUnitInc() << ")|"; + }); + } + + // Give a little extra priority to a .cur instruction if there is a resource + // available for it. + auto &QST = DAG->MF.getSubtarget(); + auto &QII = *QST.getInstrInfo(); + if (SU->isInstr() && QII.mayBeCurLoad(SU->getInstr())) { + if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) { + ResCount += PriorityTwo; + DEBUG(if (verbose) dbgs() << "C|"); + } else if (Q.getID() == BotQID && + Bot.ResourceModel->isResourceAvailable(SU)) { + ResCount += PriorityTwo; + DEBUG(if (verbose) dbgs() << "C|"); + } + } + + // Give preference to a zero latency instruction if the dependent + // instruction is in the current packet. + if (Q.getID() == TopQID) { + for (const SDep &PI : SU->Preds) { + if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && + PI.getLatency() == 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount += PriorityThree; + DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } else { + for (const SDep &SI : SU->Succs) { + if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && + SI.getLatency() == 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount += PriorityThree; + DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } + + // Give less preference to an instruction that will cause a stall with + // an instruction in the previous packet. + if (QII.isV60VectorInstruction(Instr)) { + // Check for stalls in the previous packet. + if (Q.getID() == TopQID) { + for (auto J : Top.ResourceModel->OldPacket) + if (QII.producesStall(J->getInstr(), Instr)) + ResCount -= PriorityOne; + } else { + for (auto J : Bot.ResourceModel->OldPacket) + if (QII.producesStall(Instr, J->getInstr())) + ResCount -= PriorityOne; + } + } - DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); + // If the instruction has a non-zero latency dependence with an instruction in + // the current packet, then it should not be scheduled yet. The case occurs + // when the dependent instruction is scheduled in a new packet, so the + // scheduler updates the current cycle and pending instructions become + // available. + if (CheckEarlyAvail) { + if (Q.getID() == TopQID) { + for (const auto &PI : SU->Preds) { + if (PI.getLatency() > 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount -= PriorityOne; + DEBUG(if (verbose) dbgs() << "D|"); + } + } + } else { + for (const auto &SI : SU->Succs) { + if (SI.getLatency() > 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount -= PriorityOne; + DEBUG(if (verbose) dbgs() << "D|"); + } + } + } + } + + DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "Total " << std::setw(4) << ResCount << ")"; + dbgs() << dbgstr.str(); + }); return ResCount; } @@ -539,7 +795,9 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, SchedCandidate &Candidate) { - DEBUG(Q.dump()); + DEBUG(if (SchedDebugVerboseLevel > 1) + readyQueueVerboseDump(RPTracker, Candidate, Q); + else Q.dump();); // getMaxPressureDelta temporarily modifies the tracker. RegPressureTracker &TempTracker = const_cast(RPTracker); @@ -556,6 +814,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, // Initialize the candidate if needed. if (!Candidate.SU) { + DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); Candidate.SU = *I; Candidate.RPDelta = RPDelta; Candidate.SCost = CurrentCost; @@ -565,7 +824,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, // Best cost. if (CurrentCost > Candidate.SCost) { - DEBUG(traceCandidate("CCAND", Q, *I)); + DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); Candidate.SU = *I; Candidate.RPDelta = RPDelta; Candidate.SCost = CurrentCost; @@ -573,6 +832,69 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, continue; } + // Tie breaker using Timing Class. + if (!DisableTCTie) { + auto &QST = DAG->MF.getSubtarget(); + auto &QII = *QST.getInstrInfo(); + + const MachineInstr *MI = (*I)->getInstr(); + const MachineInstr *CandI = Candidate.SU->getInstr(); + const InstrItineraryData *InstrItins = QST.getInstrItineraryData(); + + unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, MI); + unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, CandI); + DEBUG(dbgs() << "TC Tie Breaker Cand: " + << CandLatency << " Instr:" << InstrLatency << "\n" + << *MI << *CandI << "\n"); + if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) { + if (InstrLatency < CandLatency && TopUseShorterTie) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + DEBUG(dbgs() << "Used top shorter tie breaker\n"); + continue; + } else if (InstrLatency > CandLatency && !TopUseShorterTie) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + DEBUG(dbgs() << "Used top longer tie breaker\n"); + continue; + } + } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) { + if (InstrLatency < CandLatency && BotUseShorterTie) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + DEBUG(dbgs() << "Used Bot shorter tie breaker\n"); + continue; + } else if (InstrLatency > CandLatency && !BotUseShorterTie) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + DEBUG(dbgs() << "Used Bot longer tie breaker\n"); + continue; + } + } + } + + if (CurrentCost == Candidate.SCost) { + if ((Q.getID() == TopQID && + (*I)->Succs.size() > Candidate.SU->Succs.size()) || + (Q.getID() == BotQID && + (*I)->Preds.size() < Candidate.SU->Preds.size())) { + DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + } + // Fall through to original instruction order. // Only consider node order if Candidate was chosen from this Q. if (FoundCandidate == NoCand) @@ -586,10 +908,12 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { + DEBUG(dbgs() << "Picked only Bottom\n"); IsTopNode = false; return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { + DEBUG(dbgs() << "Picked only Top\n"); IsTopNode = true; return SU; } @@ -607,6 +931,7 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. if (BotResult == SingleExcess || BotResult == SingleCritical) { + DEBUG(dbgs() << "Prefered Bottom Node\n"); IsTopNode = false; return BotCand.SU; } @@ -617,24 +942,29 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { assert(TopResult != NoCand && "failed to find the first candidate"); if (TopResult == SingleExcess || TopResult == SingleCritical) { + DEBUG(dbgs() << "Prefered Top Node\n"); IsTopNode = true; return TopCand.SU; } // If either Q has a single candidate that minimizes pressure above the // original region's pressure pick it. if (BotResult == SingleMax) { + DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); IsTopNode = false; return BotCand.SU; } if (TopResult == SingleMax) { + DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); IsTopNode = true; return TopCand.SU; } if (TopCand.SCost > BotCand.SCost) { + DEBUG(dbgs() << "Prefered Top Node Cost\n"); IsTopNode = true; return TopCand.SU; } // Otherwise prefer the bottom candidate in node order. + DEBUG(dbgs() << "Prefered Bottom in Node order\n"); IsTopNode = false; return BotCand.SU; } diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index 60343442e327..51c84a4cee31 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -24,7 +24,6 @@ #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -53,6 +52,10 @@ class VLIWResourceModel { /// Total packets created. unsigned TotalPackets; +public: + /// Save the last formed packet. + std::vector OldPacket; + public: VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM) : SchedModel(SM), TotalPackets(0) { @@ -64,6 +67,8 @@ public: Packet.resize(SchedModel->getIssueWidth()); Packet.clear(); + OldPacket.resize(SchedModel->getIssueWidth()); + OldPacket.clear(); ResourcesModel->clearResources(); } @@ -86,7 +91,12 @@ public: bool isResourceAvailable(SUnit *SU); bool reserveResources(SUnit *SU); + void savePacket(); unsigned getTotalPackets() const { return TotalPackets; } + + bool isInPacket(SUnit *SU) const { + return std::find(Packet.begin(), Packet.end(), SU) != Packet.end(); + } }; /// Extend the standard ScheduleDAGMI to provide more context and override the @@ -100,8 +110,6 @@ public: /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's /// time to do some work. void schedule() override; - /// Perform platform-specific DAG postprocessing. - void postprocessDAG(); }; /// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics @@ -167,6 +175,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) { DAG = dag; SchedModel = smodel; + IssueCount = 0; } bool isTop() const { @@ -234,7 +243,10 @@ protected: SchedCandidate &Candidate); #ifndef NDEBUG void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, - PressureChange P = PressureChange()); + int Cost, PressureChange P = PressureChange()); + + void readyQueueVerboseDump(const RegPressureTracker &RPTracker, + SchedCandidate &Candidate, ReadyQueue &Q); #endif }; diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 20c4ab112b5f..3ffb9cffc6a6 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -21,14 +21,12 @@ // // //===----------------------------------------------------------------------===// -#include "llvm/PassSupport.h" #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -37,14 +35,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include using namespace llvm; #define DEBUG_TYPE "hexagon-nvj" @@ -87,12 +84,16 @@ namespace { } bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } private: /// \brief A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; - bool isNewValueJumpCandidate(const MachineInstr *MI) const; + bool isNewValueJumpCandidate(const MachineInstr &MI) const; }; } // end of anonymous namespace @@ -116,7 +117,7 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, MachineFunction &MF) { // Predicated instruction can not be feeder to NVJ. - if (QII->isPredicated(II)) + if (QII->isPredicated(*II)) return false; // Bail out if feederReg is a paired register (double regs in @@ -219,25 +220,24 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, MachineBasicBlock::iterator end, MachineFunction &MF) { - MachineInstr *MI = II; + MachineInstr &MI = *II; // If the second operand of the compare is an imm, make sure it's in the // range specified by the arch. if (!secondReg) { - int64_t v = MI->getOperand(2).getImm(); + int64_t v = MI.getOperand(2).getImm(); - if (!(isUInt<5>(v) || - ((MI->getOpcode() == Hexagon::C2_cmpeqi || - MI->getOpcode() == Hexagon::C2_cmpgti) && - (v == -1)))) + if (!(isUInt<5>(v) || ((MI.getOpcode() == Hexagon::C2_cmpeqi || + MI.getOpcode() == Hexagon::C2_cmpgti) && + (v == -1)))) return false; } unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. - cmpReg1 = MI->getOperand(1).getReg(); + cmpReg1 = MI.getOperand(1).getReg(); if (secondReg) { - cmpOp2 = MI->getOperand(2).getReg(); + cmpOp2 = MI.getOperand(2).getReg(); // Make sure that that second register is not from COPY // At machine code level, we don't need this, but if we decide @@ -367,22 +367,22 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, return 0; } -bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI) - const { - switch (MI->getOpcode()) { - case Hexagon::C2_cmpeq: - case Hexagon::C2_cmpeqi: - case Hexagon::C2_cmpgt: - case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgtu: - case Hexagon::C2_cmpgtui: - case Hexagon::C4_cmpneq: - case Hexagon::C4_cmplte: - case Hexagon::C4_cmplteu: - return true; - - default: - return false; +bool HexagonNewValueJump::isNewValueJumpCandidate( + const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtui: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + return true; + + default: + return false; } } @@ -393,6 +393,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { << "********** Function: " << MF.getName() << "\n"); + if (skipFunction(*MF.getFunction())) + return false; + // If we move NewValueJump before register allocation we'll need live variable // analysis here too. @@ -435,28 +438,27 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // Traverse the basic block - bottom up for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); MII != E;) { - MachineInstr *MI = --MII; - if (MI->isDebugValue()) { + MachineInstr &MI = *--MII; + if (MI.isDebugValue()) { continue; } if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) break; - DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); + DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n"); - if (!foundJump && - (MI->getOpcode() == Hexagon::J2_jumpt || - MI->getOpcode() == Hexagon::J2_jumpf || - MI->getOpcode() == Hexagon::J2_jumptnewpt || - MI->getOpcode() == Hexagon::J2_jumptnew || - MI->getOpcode() == Hexagon::J2_jumpfnewpt || - MI->getOpcode() == Hexagon::J2_jumpfnew)) { + if (!foundJump && (MI.getOpcode() == Hexagon::J2_jumpt || + MI.getOpcode() == Hexagon::J2_jumpf || + MI.getOpcode() == Hexagon::J2_jumptnewpt || + MI.getOpcode() == Hexagon::J2_jumptnew || + MI.getOpcode() == Hexagon::J2_jumpfnewpt || + MI.getOpcode() == Hexagon::J2_jumpfnew)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; - jmpInstr = MI; - predReg = MI->getOperand(0).getReg(); + jmpInstr = &MI; + predReg = MI.getOperand(0).getReg(); afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the @@ -485,11 +487,13 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if (predLive) break; - jmpTarget = MI->getOperand(1).getMBB(); + if (!MI.getOperand(1).isMBB()) + continue; + jmpTarget = MI.getOperand(1).getMBB(); foundJump = true; - if (MI->getOpcode() == Hexagon::J2_jumpf || - MI->getOpcode() == Hexagon::J2_jumpfnewpt || - MI->getOpcode() == Hexagon::J2_jumpfnew) { + if (MI.getOpcode() == Hexagon::J2_jumpf || + MI.getOpcode() == Hexagon::J2_jumpfnewpt || + MI.getOpcode() == Hexagon::J2_jumpfnew) { invertPredicate = true; } continue; @@ -498,41 +502,40 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // No new value jump if there is a barrier. A barrier has to be in its // own packet. A barrier has zero operands. We conservatively bail out // here if we see any instruction with zero operands. - if (foundJump && MI->getNumOperands() == 0) + if (foundJump && MI.getNumOperands() == 0) break; - if (foundJump && - !foundCompare && - MI->getOperand(0).isReg() && - MI->getOperand(0).getReg() == predReg) { + if (foundJump && !foundCompare && MI.getOperand(0).isReg() && + MI.getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 if (isNewValueJumpCandidate(MI)) { - assert((MI->getDesc().isCompare()) && + assert( + (MI.getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); - isSecondOpReg = MI->getOperand(2).isReg(); + isSecondOpReg = MI.getOperand(2).isReg(); if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, afterRA, jmpPos, MF)) break; - cmpInstr = MI; + cmpInstr = &MI; cmpPos = MII; foundCompare = true; // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. - cmpReg1 = MI->getOperand(1).getReg(); - if (MI->getOperand(1).isKill()) + cmpReg1 = MI.getOperand(1).getReg(); + if (MI.getOperand(1).isKill()) MO1IsKill = true; if (isSecondOpReg) { - cmpOp2 = MI->getOperand(2).getReg(); - if (MI->getOperand(2).isKill()) + cmpOp2 = MI.getOperand(2).getReg(); + if (MI.getOperand(2).isKill()) MO2IsKill = true; } else - cmpOp2 = MI->getOperand(2).getImm(); + cmpOp2 = MI.getOperand(2).getImm(); continue; } } @@ -545,13 +548,12 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { bool foundFeeder = false; MachineBasicBlock::iterator feederPos = MII; - if (MI->getOperand(0).isReg() && - MI->getOperand(0).isDef() && - (MI->getOperand(0).getReg() == cmpReg1 || - (isSecondOpReg && - MI->getOperand(0).getReg() == (unsigned) cmpOp2))) { + if (MI.getOperand(0).isReg() && MI.getOperand(0).isDef() && + (MI.getOperand(0).getReg() == cmpReg1 || + (isSecondOpReg && + MI.getOperand(0).getReg() == (unsigned)cmpOp2))) { - unsigned feederReg = MI->getOperand(0).getReg(); + unsigned feederReg = MI.getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true @@ -600,15 +602,15 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // the operands of the feeder. bool updatedIsKill = false; - for (unsigned i = 0; i < MI->getNumOperands(); i++) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned feederReg = MO.getReg(); for (MachineBasicBlock::iterator localII = feederPos, end = jmpPos; localII != end; localII++) { - MachineInstr *localMI = localII; - for (unsigned j = 0; j < localMI->getNumOperands(); j++) { - MachineOperand &localMO = localMI->getOperand(j); + MachineInstr &localMI = *localII; + for (unsigned j = 0; j < localMI.getNumOperands(); j++) { + MachineOperand &localMO = localMI.getOperand(j); if (localMO.isReg() && localMO.isUse() && localMO.isKill() && feederReg == localMO.getReg()) { // We found that there is kill of a use register @@ -625,12 +627,12 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if (updatedIsKill) break; } - MBB->splice(jmpPos, MI->getParent(), MI); - MBB->splice(jmpPos, MI->getParent(), cmpInstr); - DebugLoc dl = MI->getDebugLoc(); + MBB->splice(jmpPos, MI.getParent(), MI); + MBB->splice(jmpPos, MI.getParent(), cmpInstr); + DebugLoc dl = MI.getDebugLoc(); MachineInstr *NewMI; - assert((isNewValueJumpCandidate(cmpInstr)) && + assert((isNewValueJumpCandidate(*cmpInstr)) && "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified, diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index fbd29cd4d6d1..11092d2b92fe 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; } +def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; } def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; } def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; } def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; } @@ -48,6 +49,7 @@ let OperandType = "OPERAND_IMMEDIATE", DecoderMethod = "unsignedImmDecoder" in { def s32Imm : Operand { let ParserMatchClass = s32ImmOperand; let DecoderMethod = "s32ImmDecoder"; } + def s23_2Imm : Operand { let ParserMatchClass = s23_2ImmOperand; } def s8Imm : Operand { let ParserMatchClass = s8ImmOperand; let DecoderMethod = "s8ImmDecoder"; } def s8Imm64 : Operand { let ParserMatchClass = s8Imm64Operand; @@ -345,22 +347,6 @@ def u1ImmPred32 : PatLeaf<(i32 imm), [{ return isUInt<1>(v); }]>; -def m5BImmPred : PatLeaf<(i32 imm), [{ - // m5BImmPred predicate - True if the (char) number is in range -1 .. -31 - // and will fit in a 5 bit field when made positive, for use in memops. - // this is specific to the zero extending of a negative by CombineInstr - int8_t v = (int8_t)N->getSExtValue(); - return (-31 <= v && v <= -1); -}]>; - -def m5HImmPred : PatLeaf<(i32 imm), [{ - // m5HImmPred predicate - True if the (short) number is in range -1 .. -31 - // and will fit in a 5 bit field when made positive, for use in memops. - // this is specific to the zero extending of a negative by CombineInstr - int16_t v = (int16_t)N->getSExtValue(); - return (-31 <= v && v <= -1); -}]>; - def m5ImmPred : PatLeaf<(i32 imm), [{ // m5ImmPred predicate - True if the number is in range -1 .. -31 // and will fit in a 5 bit field when made positive, for use in memops. @@ -402,60 +388,6 @@ def Clr5ImmPred : PatLeaf<(i32 imm), [{ return ImmIsSingleBit(v); }]>; -def SetClr5ImmPred : PatLeaf<(i32 imm), [{ - // True if the immediate is in range 0..31. - int32_t v = (int32_t)N->getSExtValue(); - return (v >= 0 && v <= 31); -}]>; - -def Set4ImmPred : PatLeaf<(i32 imm), [{ - // Set4ImmPred predicate - True if the number is in the series of values: - // [ 2^0, 2^1, ... 2^15 ]. - // For use in setbit immediate. - uint16_t v = (int16_t)N->getSExtValue(); - // Constrain to 16 bits, and then check for single bit. - return ImmIsSingleBit(v); -}]>; - -def Clr4ImmPred : PatLeaf<(i32 imm), [{ - // Clr4ImmPred predicate - True if the number is in the series of - // bit negated values: - // [ 2^0, 2^1, ... 2^15 ]. - // For use in setbit and clrbit immediate. - uint16_t v = ~ (int16_t)N->getSExtValue(); - // Constrain to 16 bits, and then check for single bit. - return ImmIsSingleBit(v); -}]>; - -def SetClr4ImmPred : PatLeaf<(i32 imm), [{ - // True if the immediate is in the range 0..15. - int16_t v = (int16_t)N->getSExtValue(); - return (v >= 0 && v <= 15); -}]>; - -def Set3ImmPred : PatLeaf<(i32 imm), [{ - // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ]. - // For use in setbit immediate. - uint8_t v = (int8_t)N->getSExtValue(); - // Constrain to 8 bits, and then check for single bit. - return ImmIsSingleBit(v); -}]>; - -def Clr3ImmPred : PatLeaf<(i32 imm), [{ - // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ]. - // For use in setbit and clrbit immediate. - uint8_t v = ~ (int8_t)N->getSExtValue(); - // Constrain to 8 bits, and then check for single bit. - return ImmIsSingleBit(v); -}]>; - -def SetClr3ImmPred : PatLeaf<(i32 imm), [{ - // True if the immediate is in the range 0..7. - int8_t v = (int8_t)N->getSExtValue(); - return (v >= 0 && v <= 7); -}]>; - - // Extendable immediate operands. def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; } def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; } diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp new file mode 100644 index 000000000000..4dff0dbc2b71 --- /dev/null +++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -0,0 +1,663 @@ +//===--- HexagonOptAddrMode.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This implements a Hexagon-specific pass to optimize addressing mode for +// load/store instructions. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "opt-addr-mode" + +#include "HexagonTargetMachine.h" +#include "RDFGraph.h" +#include "RDFLiveness.h" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +static cl::opt CodeGrowthLimit("hexagon-amode-growth-limit", + cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode " + "optimization")); + +using namespace llvm; +using namespace rdf; + +namespace llvm { + FunctionPass *createHexagonOptAddrMode(); + void initializeHexagonOptAddrModePass(PassRegistry &); +} + +namespace { +class HexagonOptAddrMode : public MachineFunctionPass { +public: + static char ID; + HexagonOptAddrMode() + : MachineFunctionPass(ID), HII(0), MDT(0), DFG(0), LV(0) { + PassRegistry &R = *PassRegistry::getPassRegistry(); + initializeHexagonOptAddrModePass(R); + } + const char *getPassName() const override { + return "Optimize addressing mode of load/store"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + } + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + typedef DenseSet MISetType; + typedef DenseMap InstrEvalMap; + const HexagonInstrInfo *HII; + MachineDominatorTree *MDT; + DataFlowGraph *DFG; + DataFlowGraph::DefStackMap DefM; + std::map> RDefMap; + Liveness *LV; + MISetType Deleted; + + bool processBlock(NodeAddr BA); + bool xformUseMI(MachineInstr *TfrMI, MachineInstr *UseMI, + NodeAddr UseN, unsigned UseMOnum); + bool analyzeUses(unsigned DefR, const NodeList &UNodeList, + InstrEvalMap &InstrEvalResult, short &SizeInc); + bool hasRepForm(MachineInstr *MI, unsigned TfrDefR); + bool canRemoveAddasl(NodeAddr AddAslSN, MachineInstr *MI, + const NodeList &UNodeList); + void getAllRealUses(NodeAddr SN, NodeList &UNodeList); + bool allValidCandidates(NodeAddr SA, NodeList &UNodeList); + short getBaseWithLongOffset(const MachineInstr *MI) const; + void updateMap(NodeAddr IA); + bool constructDefMap(MachineBasicBlock *B); + bool changeStore(MachineInstr *OldMI, MachineOperand ImmOp, + unsigned ImmOpNum); + bool changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum); + bool changeAddAsl(NodeAddr AddAslUN, MachineInstr *AddAslMI, + const MachineOperand &ImmOp, unsigned ImmOpNum); +}; +} + +char HexagonOptAddrMode::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "opt-amode", + "Optimize addressing mode", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(HexagonOptAddrMode, "opt-amode", "Optimize addressing mode", + false, false) + +bool HexagonOptAddrMode::hasRepForm(MachineInstr *MI, unsigned TfrDefR) { + const MCInstrDesc &MID = MI->getDesc(); + + if ((!MID.mayStore() && !MID.mayLoad()) || HII->isPredicated(*MI)) + return false; + + if (MID.mayStore()) { + MachineOperand StOp = MI->getOperand(MI->getNumOperands() - 1); + if (StOp.isReg() && StOp.getReg() == TfrDefR) + return false; + } + + if (HII->getAddrMode(MI) == HexagonII::BaseRegOffset) + // Tranform to Absolute plus register offset. + return (HII->getBaseWithLongOffset(MI) >= 0); + else if (HII->getAddrMode(MI) == HexagonII::BaseImmOffset) + // Tranform to absolute addressing mode. + return (HII->getAbsoluteForm(MI) >= 0); + + return false; +} + +// Check if addasl instruction can be removed. This is possible only +// if it's feeding to only load/store instructions with base + register +// offset as these instruction can be tranformed to use 'absolute plus +// shifted register offset'. +// ex: +// Rs = ##foo +// Rx = addasl(Rs, Rt, #2) +// Rd = memw(Rx + #28) +// Above three instructions can be replaced with Rd = memw(Rt<<#2 + ##foo+28) + +bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr AddAslSN, + MachineInstr *MI, + const NodeList &UNodeList) { + // check offset size in addasl. if 'offset > 3' return false + const MachineOperand &OffsetOp = MI->getOperand(3); + if (!OffsetOp.isImm() || OffsetOp.getImm() > 3) + return false; + + unsigned OffsetReg = MI->getOperand(2).getReg(); + RegisterRef OffsetRR; + NodeId OffsetRegRD = 0; + for (NodeAddr UA : AddAslSN.Addr->members_if(DFG->IsUse, *DFG)) { + RegisterRef RR = UA.Addr->getRegRef(); + if (OffsetReg == RR.Reg) { + OffsetRR = RR; + OffsetRegRD = UA.Addr->getReachingDef(); + } + } + + for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { + NodeAddr UA = *I; + NodeAddr IA = UA.Addr->getOwner(*DFG); + if ((UA.Addr->getFlags() & NodeAttrs::PhiRef) || + RDefMap[OffsetRR][IA.Id] != OffsetRegRD) + return false; + + MachineInstr *UseMI = NodeAddr(IA).Addr->getCode(); + NodeAddr OffsetRegDN = DFG->addr(OffsetRegRD); + // Reaching Def to an offset register can't be a phi. + if ((OffsetRegDN.Addr->getFlags() & NodeAttrs::PhiRef) && + MI->getParent() != UseMI->getParent()) + return false; + + const MCInstrDesc &UseMID = UseMI->getDesc(); + if ((!UseMID.mayLoad() && !UseMID.mayStore()) || + HII->getAddrMode(UseMI) != HexagonII::BaseImmOffset || + getBaseWithLongOffset(UseMI) < 0) + return false; + + // Addasl output can't be a store value. + if (UseMID.mayStore() && UseMI->getOperand(2).isReg() && + UseMI->getOperand(2).getReg() == MI->getOperand(0).getReg()) + return false; + + for (auto &Mo : UseMI->operands()) + if (Mo.isFI()) + return false; + } + return true; +} + +bool HexagonOptAddrMode::allValidCandidates(NodeAddr SA, + NodeList &UNodeList) { + for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { + NodeAddr UN = *I; + RegisterRef UR = UN.Addr->getRegRef(); + NodeSet Visited, Defs; + const auto &ReachingDefs = LV->getAllReachingDefsRec(UR, UN, Visited, Defs); + if (ReachingDefs.size() > 1) { + DEBUG({ + dbgs() << "*** Multiple Reaching Defs found!!! ***\n"; + for (auto DI : ReachingDefs) { + NodeAddr DA = DFG->addr(DI); + NodeAddr TempIA = DA.Addr->getOwner(*DFG); + dbgs() << "\t\t[Reaching Def]: " + << Print>(TempIA, *DFG) << "\n"; + } + }); + return false; + } + } + return true; +} + +void HexagonOptAddrMode::getAllRealUses(NodeAddr SA, + NodeList &UNodeList) { + for (NodeAddr DA : SA.Addr->members_if(DFG->IsDef, *DFG)) { + DEBUG(dbgs() << "\t\t[DefNode]: " << Print>(DA, *DFG) + << "\n"); + RegisterRef DR = DA.Addr->getRegRef(); + auto UseSet = LV->getAllReachedUses(DR, DA); + + for (auto UI : UseSet) { + NodeAddr UA = DFG->addr(UI); + DEBUG({ + NodeAddr TempIA = UA.Addr->getOwner(*DFG); + dbgs() << "\t\t\t[Reached Use]: " + << Print>(TempIA, *DFG) << "\n"; + }); + + if (UA.Addr->getFlags() & NodeAttrs::PhiRef) { + NodeAddr PA = UA.Addr->getOwner(*DFG); + NodeId id = PA.Id; + const Liveness::RefMap &phiUse = LV->getRealUses(id); + DEBUG(dbgs() << "\t\t\t\tphi real Uses" + << Print(phiUse, *DFG) << "\n"); + if (phiUse.size() > 0) { + for (auto I : phiUse) { + if (DR != I.first) + continue; + auto phiUseSet = I.second; + for (auto phiUI : phiUseSet) { + NodeAddr phiUA = DFG->addr(phiUI); + UNodeList.push_back(phiUA); + } + } + } + } else + UNodeList.push_back(UA); + } + } +} + +bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR, + const NodeList &UNodeList, + InstrEvalMap &InstrEvalResult, + short &SizeInc) { + bool KeepTfr = false; + bool HasRepInstr = false; + InstrEvalResult.clear(); + + for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { + bool CanBeReplaced = false; + NodeAddr UN = *I; + NodeAddr SN = UN.Addr->getOwner(*DFG); + MachineInstr *MI = SN.Addr->getCode(); + const MCInstrDesc &MID = MI->getDesc(); + if ((MID.mayLoad() || MID.mayStore())) { + if (!hasRepForm(MI, tfrDefR)) { + KeepTfr = true; + continue; + } + SizeInc++; + CanBeReplaced = true; + } else if (MI->getOpcode() == Hexagon::S2_addasl_rrri) { + NodeList AddaslUseList; + + DEBUG(dbgs() << "\nGetting ReachedUses for === " << *MI << "\n"); + getAllRealUses(SN, AddaslUseList); + // Process phi nodes. + if (allValidCandidates(SN, AddaslUseList) && + canRemoveAddasl(SN, MI, AddaslUseList)) { + SizeInc += AddaslUseList.size(); + SizeInc -= 1; // Reduce size by 1 as addasl itself can be removed. + CanBeReplaced = true; + } else + SizeInc++; + } else + // Currently, only load/store and addasl are handled. + // Some other instructions to consider - + // A2_add -> A2_addi + // M4_mpyrr_addr -> M4_mpyrr_addi + KeepTfr = true; + + InstrEvalResult[MI] = CanBeReplaced; + HasRepInstr |= CanBeReplaced; + } + + // Reduce total size by 2 if original tfr can be deleted. + if (!KeepTfr) + SizeInc -= 2; + + return HasRepInstr; +} + +bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, + unsigned ImmOpNum) { + bool Changed = false; + MachineBasicBlock *BB = OldMI->getParent(); + auto UsePos = MachineBasicBlock::iterator(OldMI); + MachineBasicBlock::instr_iterator InsertPt = UsePos.getInstrIterator(); + ++InsertPt; + unsigned OpStart; + unsigned OpEnd = OldMI->getNumOperands(); + MachineInstrBuilder MIB; + + if (ImmOpNum == 1) { + if (HII->getAddrMode(OldMI) == HexagonII::BaseRegOffset) { + short NewOpCode = HII->getBaseWithLongOffset(OldMI); + assert(NewOpCode >= 0 && "Invalid New opcode\n"); + MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); + MIB.addOperand(OldMI->getOperand(0)); + MIB.addOperand(OldMI->getOperand(2)); + MIB.addOperand(OldMI->getOperand(3)); + MIB.addOperand(ImmOp); + OpStart = 4; + Changed = true; + } else if (HII->getAddrMode(OldMI) == HexagonII::BaseImmOffset) { + short NewOpCode = HII->getAbsoluteForm(OldMI); + assert(NewOpCode >= 0 && "Invalid New opcode\n"); + MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)) + .addOperand(OldMI->getOperand(0)); + const GlobalValue *GV = ImmOp.getGlobal(); + int64_t Offset = ImmOp.getOffset() + OldMI->getOperand(2).getImm(); + + MIB.addGlobalAddress(GV, Offset, ImmOp.getTargetFlags()); + OpStart = 3; + Changed = true; + } else + Changed = false; + + DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); + DEBUG(dbgs() << "[TO]: " << MIB << "\n"); + } else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) { + short NewOpCode = HII->xformRegToImmOffset(OldMI); + assert(NewOpCode >= 0 && "Invalid New opcode\n"); + MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); + MIB.addOperand(OldMI->getOperand(0)); + MIB.addOperand(OldMI->getOperand(1)); + MIB.addOperand(ImmOp); + OpStart = 4; + Changed = true; + DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); + DEBUG(dbgs() << "[TO]: " << MIB << "\n"); + } + + if (Changed) + for (unsigned i = OpStart; i < OpEnd; ++i) + MIB.addOperand(OldMI->getOperand(i)); + + return Changed; +} + +bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, + unsigned ImmOpNum) { + bool Changed = false; + unsigned OpStart; + unsigned OpEnd = OldMI->getNumOperands(); + MachineBasicBlock *BB = OldMI->getParent(); + auto UsePos = MachineBasicBlock::iterator(OldMI); + MachineBasicBlock::instr_iterator InsertPt = UsePos.getInstrIterator(); + ++InsertPt; + MachineInstrBuilder MIB; + if (ImmOpNum == 0) { + if (HII->getAddrMode(OldMI) == HexagonII::BaseRegOffset) { + short NewOpCode = HII->getBaseWithLongOffset(OldMI); + assert(NewOpCode >= 0 && "Invalid New opcode\n"); + MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); + MIB.addOperand(OldMI->getOperand(1)); + MIB.addOperand(OldMI->getOperand(2)); + MIB.addOperand(ImmOp); + MIB.addOperand(OldMI->getOperand(3)); + OpStart = 4; + } else if (HII->getAddrMode(OldMI) == HexagonII::BaseImmOffset) { + short NewOpCode = HII->getAbsoluteForm(OldMI); + assert(NewOpCode >= 0 && "Invalid New opcode\n"); + MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); + const GlobalValue *GV = ImmOp.getGlobal(); + int64_t Offset = ImmOp.getOffset() + OldMI->getOperand(1).getImm(); + MIB.addGlobalAddress(GV, Offset, ImmOp.getTargetFlags()); + MIB.addOperand(OldMI->getOperand(2)); + OpStart = 3; + } + Changed = true; + DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); + DEBUG(dbgs() << "[TO]: " << MIB << "\n"); + } else if (ImmOpNum == 1 && OldMI->getOperand(2).getImm() == 0) { + short NewOpCode = HII->xformRegToImmOffset(OldMI); + assert(NewOpCode >= 0 && "Invalid New opcode\n"); + MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); + MIB.addOperand(OldMI->getOperand(0)); + MIB.addOperand(ImmOp); + MIB.addOperand(OldMI->getOperand(1)); + OpStart = 2; + Changed = true; + DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); + DEBUG(dbgs() << "[TO]: " << MIB << "\n"); + } + if (Changed) + for (unsigned i = OpStart; i < OpEnd; ++i) + MIB.addOperand(OldMI->getOperand(i)); + + return Changed; +} + +short HexagonOptAddrMode::getBaseWithLongOffset(const MachineInstr *MI) const { + if (HII->getAddrMode(MI) == HexagonII::BaseImmOffset) { + short TempOpCode = HII->getBaseWithRegOffset(MI); + return HII->getBaseWithLongOffset(TempOpCode); + } else + return HII->getBaseWithLongOffset(MI); +} + +bool HexagonOptAddrMode::changeAddAsl(NodeAddr AddAslUN, + MachineInstr *AddAslMI, + const MachineOperand &ImmOp, + unsigned ImmOpNum) { + NodeAddr SA = AddAslUN.Addr->getOwner(*DFG); + + DEBUG(dbgs() << "Processing addasl :" << *AddAslMI << "\n"); + + NodeList UNodeList; + getAllRealUses(SA, UNodeList); + + for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { + NodeAddr UseUN = *I; + assert(!(UseUN.Addr->getFlags() & NodeAttrs::PhiRef) && + "Can't transform this 'AddAsl' instruction!"); + + NodeAddr UseIA = UseUN.Addr->getOwner(*DFG); + DEBUG(dbgs() << "[InstrNode]: " << Print>(UseIA, *DFG) + << "\n"); + MachineInstr *UseMI = UseIA.Addr->getCode(); + DEBUG(dbgs() << "[MI getParent()->getNumber() + << ">]: " << *UseMI << "\n"); + const MCInstrDesc &UseMID = UseMI->getDesc(); + assert(HII->getAddrMode(UseMI) == HexagonII::BaseImmOffset); + + auto UsePos = MachineBasicBlock::iterator(UseMI); + MachineBasicBlock::instr_iterator InsertPt = UsePos.getInstrIterator(); + short NewOpCode = getBaseWithLongOffset(UseMI); + assert(NewOpCode >= 0 && "Invalid New opcode\n"); + + unsigned OpStart; + unsigned OpEnd = UseMI->getNumOperands(); + + MachineBasicBlock *BB = UseMI->getParent(); + MachineInstrBuilder MIB = + BuildMI(*BB, InsertPt, UseMI->getDebugLoc(), HII->get(NewOpCode)); + // change mem(Rs + # ) -> mem(Rt << # + ##) + if (UseMID.mayLoad()) { + MIB.addOperand(UseMI->getOperand(0)); + MIB.addOperand(AddAslMI->getOperand(2)); + MIB.addOperand(AddAslMI->getOperand(3)); + const GlobalValue *GV = ImmOp.getGlobal(); + MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(), + ImmOp.getTargetFlags()); + OpStart = 3; + } else if (UseMID.mayStore()) { + MIB.addOperand(AddAslMI->getOperand(2)); + MIB.addOperand(AddAslMI->getOperand(3)); + const GlobalValue *GV = ImmOp.getGlobal(); + MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(), + ImmOp.getTargetFlags()); + MIB.addOperand(UseMI->getOperand(2)); + OpStart = 3; + } else + llvm_unreachable("Unhandled instruction"); + + for (unsigned i = OpStart; i < OpEnd; ++i) + MIB.addOperand(UseMI->getOperand(i)); + + Deleted.insert(UseMI); + } + + return true; +} + +bool HexagonOptAddrMode::xformUseMI(MachineInstr *TfrMI, MachineInstr *UseMI, + NodeAddr UseN, + unsigned UseMOnum) { + const MachineOperand ImmOp = TfrMI->getOperand(1); + const MCInstrDesc &MID = UseMI->getDesc(); + unsigned Changed = false; + if (MID.mayLoad()) + Changed = changeLoad(UseMI, ImmOp, UseMOnum); + else if (MID.mayStore()) + Changed = changeStore(UseMI, ImmOp, UseMOnum); + else if (UseMI->getOpcode() == Hexagon::S2_addasl_rrri) + Changed = changeAddAsl(UseN, UseMI, ImmOp, UseMOnum); + + if (Changed) + Deleted.insert(UseMI); + + return Changed; +} + +bool HexagonOptAddrMode::processBlock(NodeAddr BA) { + bool Changed = false; + + for (auto IA : BA.Addr->members(*DFG)) { + if (!DFG->IsCode(IA)) + continue; + + NodeAddr SA = IA; + MachineInstr *MI = SA.Addr->getCode(); + if (MI->getOpcode() != Hexagon::A2_tfrsi || + !MI->getOperand(1).isGlobal()) + continue; + + DEBUG(dbgs() << "[Analyzing A2_tfrsi]: " << *MI << "\n"); + DEBUG(dbgs() << "\t[InstrNode]: " << Print>(IA, *DFG) + << "\n"); + + NodeList UNodeList; + getAllRealUses(SA, UNodeList); + + if (!allValidCandidates(SA, UNodeList)) + continue; + + short SizeInc = 0; + unsigned DefR = MI->getOperand(0).getReg(); + InstrEvalMap InstrEvalResult; + + // Analyze all uses and calculate increase in size. Perform the optimization + // only if there is no increase in size. + if (!analyzeUses(DefR, UNodeList, InstrEvalResult, SizeInc)) + continue; + if (SizeInc > CodeGrowthLimit) + continue; + + bool KeepTfr = false; + + DEBUG(dbgs() << "\t[Total reached uses] : " << UNodeList.size() << "\n"); + DEBUG(dbgs() << "\t[Processing Reached Uses] ===\n"); + for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { + NodeAddr UseN = *I; + assert(!(UseN.Addr->getFlags() & NodeAttrs::PhiRef) && + "Found a PhiRef node as a real reached use!!"); + + NodeAddr OwnerN = UseN.Addr->getOwner(*DFG); + MachineInstr *UseMI = OwnerN.Addr->getCode(); + DEBUG(dbgs() << "\t\t[MI getParent()->getNumber() + << ">]: " << *UseMI << "\n"); + + int UseMOnum = -1; + unsigned NumOperands = UseMI->getNumOperands(); + for (unsigned j = 0; j < NumOperands - 1; ++j) { + const MachineOperand &op = UseMI->getOperand(j); + if (op.isReg() && op.isUse() && DefR == op.getReg()) + UseMOnum = j; + } + assert(UseMOnum >= 0 && "Invalid reached use!"); + + if (InstrEvalResult[UseMI]) + // Change UseMI if replacement is possible. + Changed |= xformUseMI(MI, UseMI, UseN, UseMOnum); + else + KeepTfr = true; + } + if (!KeepTfr) + Deleted.insert(MI); + } + return Changed; +} + +void HexagonOptAddrMode::updateMap(NodeAddr IA) { + RegisterSet RRs; + for (NodeAddr RA : IA.Addr->members(*DFG)) + RRs.insert(RA.Addr->getRegRef()); + bool Common = false; + for (auto &R : RDefMap) { + if (!RRs.count(R.first)) + continue; + Common = true; + break; + } + if (!Common) + return; + + for (auto &R : RDefMap) { + auto F = DefM.find(R.first); + if (F == DefM.end() || F->second.empty()) + continue; + R.second[IA.Id] = F->second.top()->Id; + } +} + +bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) { + bool Changed = false; + auto BA = DFG->getFunc().Addr->findBlock(B, *DFG); + DFG->markBlock(BA.Id, DefM); + + for (NodeAddr IA : BA.Addr->members(*DFG)) { + updateMap(IA); + DFG->pushDefs(IA, DefM); + } + + MachineDomTreeNode *N = MDT->getNode(B); + for (auto I : *N) + Changed |= constructDefMap(I->getBlock()); + + DFG->releaseBlock(BA.Id, DefM); + return Changed; +} + +bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + auto &HST = MF.getSubtarget(); + auto &MRI = MF.getRegInfo(); + HII = HST.getInstrInfo(); + const auto &MDF = getAnalysis(); + MDT = &getAnalysis(); + const auto &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetOperandInfo TOI(*HII); + + RegisterAliasInfo RAI(TRI); + DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, RAI, TOI); + G.build(); + DFG = &G; + + Liveness L(MRI, *DFG); + L.computePhiInfo(); + LV = &L; + + constructDefMap(&DFG->getMF().front()); + + Deleted.clear(); + NodeAddr FA = DFG->getFunc(); + DEBUG(dbgs() << "==== [RefMap#]=====:\n " + << Print>(FA, *DFG) << "\n"); + + for (NodeAddr BA : FA.Addr->members(*DFG)) + Changed |= processBlock(BA); + + for (auto MI : Deleted) + MI->eraseFromParent(); + + if (Changed) { + G.build(); + L.computeLiveIns(); + L.resetLiveIns(); + L.resetKills(); + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonOptAddrMode() { + return new HexagonOptAddrMode(); +} diff --git a/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp index 1723771550c9..7937a7908b06 100644 --- a/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp +++ b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp @@ -69,6 +69,9 @@ bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) { } bool HexagonOptimizeSZextends::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + unsigned Idx = 1; // Try to optimize sign extends in formal parameters. It's relying on // callee already sign extending the values. I'm not sure if our ABI diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index e68ff85b1da6..b064decc5c76 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -67,11 +67,11 @@ static cl::opt DisablePNotP("disable-hexagon-pnotp", cl::desc("Disable Optimization of PNotP")); static cl::opt DisableOptSZExt("disable-hexagon-optszext", - cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Disable Optimization of Sign/Zero Extends")); static cl::opt DisableOptExtTo64("disable-hexagon-opt-ext-to-64", - cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Disable Optimization of extensions to i64.")); namespace llvm { @@ -112,6 +112,9 @@ INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole", false, false) bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + QII = static_cast(MF.getSubtarget().getInstrInfo()); QRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -129,15 +132,13 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { PeepholeDoubleRegsMap.clear(); // Traverse the basic block. - for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); - ++MII) { - MachineInstr *MI = MII; + for (MachineInstr &MI : *MBB) { // Look for sign extends: // %vreg170 = SXTW %vreg166 - if (!DisableOptSZExt && MI->getOpcode() == Hexagon::A2_sxtw) { - assert (MI->getNumOperands() == 2); - MachineOperand &Dst = MI->getOperand(0); - MachineOperand &Src = MI->getOperand(1); + if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) { + assert(MI.getNumOperands() == 2); + MachineOperand &Dst = MI.getOperand(0); + MachineOperand &Src = MI.getOperand(1); unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src.getReg(); // Just handle virtual registers. @@ -152,12 +153,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Look for %vreg170 = COMBINE_ir_V4 (0, %vreg169) // %vreg170:DoublRegs, %vreg169:IntRegs - if (!DisableOptExtTo64 && - MI->getOpcode () == Hexagon::A4_combineir) { - assert (MI->getNumOperands() == 3); - MachineOperand &Dst = MI->getOperand(0); - MachineOperand &Src1 = MI->getOperand(1); - MachineOperand &Src2 = MI->getOperand(2); + if (!DisableOptExtTo64 && MI.getOpcode() == Hexagon::A4_combineir) { + assert(MI.getNumOperands() == 3); + MachineOperand &Dst = MI.getOperand(0); + MachineOperand &Src1 = MI.getOperand(1); + MachineOperand &Src2 = MI.getOperand(2); if (Src1.getImm() != 0) continue; unsigned DstReg = Dst.getReg(); @@ -170,11 +170,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. // and convert into // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. - if (MI->getOpcode() == Hexagon::S2_lsr_i_p) { - assert(MI->getNumOperands() == 3); - MachineOperand &Dst = MI->getOperand(0); - MachineOperand &Src1 = MI->getOperand(1); - MachineOperand &Src2 = MI->getOperand(2); + if (MI.getOpcode() == Hexagon::S2_lsr_i_p) { + assert(MI.getNumOperands() == 3); + MachineOperand &Dst = MI.getOperand(0); + MachineOperand &Src1 = MI.getOperand(1); + MachineOperand &Src2 = MI.getOperand(2); if (Src2.getImm() != 32) continue; unsigned DstReg = Dst.getReg(); @@ -184,11 +184,10 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { } // Look for P=NOT(P). - if (!DisablePNotP && - (MI->getOpcode() == Hexagon::C2_not)) { - assert (MI->getNumOperands() == 2); - MachineOperand &Dst = MI->getOperand(0); - MachineOperand &Src = MI->getOperand(1); + if (!DisablePNotP && MI.getOpcode() == Hexagon::C2_not) { + assert(MI.getNumOperands() == 2); + MachineOperand &Dst = MI.getOperand(0); + MachineOperand &Src = MI.getOperand(1); unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src.getReg(); // Just handle virtual registers. @@ -203,10 +202,10 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Look for copy: // %vreg176 = COPY %vreg170:subreg_loreg - if (!DisableOptSZExt && MI->isCopy()) { - assert (MI->getNumOperands() == 2); - MachineOperand &Dst = MI->getOperand(0); - MachineOperand &Src = MI->getOperand(1); + if (!DisableOptSZExt && MI.isCopy()) { + assert(MI.getNumOperands() == 2); + MachineOperand &Dst = MI.getOperand(0); + MachineOperand &Src = MI.getOperand(1); // Make sure we are copying the lower 32 bits. if (Src.getSubReg() != Hexagon::subreg_loreg) @@ -219,22 +218,18 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { // Change the 1st operand. - MI->RemoveOperand(1); - MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); + MI.RemoveOperand(1); + MI.addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); } else { DenseMap >::iterator DI = PeepholeDoubleRegsMap.find(SrcReg); if (DI != PeepholeDoubleRegsMap.end()) { std::pair PeepholeSrc = DI->second; - MI->RemoveOperand(1); - MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first, - false /*isDef*/, - false /*isImp*/, - false /*isKill*/, - false /*isDead*/, - false /*isUndef*/, - false /*isEarlyClobber*/, - PeepholeSrc.second)); + MI.RemoveOperand(1); + MI.addOperand(MachineOperand::CreateReg( + PeepholeSrc.first, false /*isDef*/, false /*isImp*/, + false /*isKill*/, false /*isDead*/, false /*isUndef*/, + false /*isEarlyClobber*/, PeepholeSrc.second)); } } } @@ -244,7 +239,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (!DisablePNotP) { bool Done = false; if (QII->isPredicated(MI)) { - MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op0 = MI.getOperand(0); unsigned Reg0 = Op0.getReg(); const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0); if (RC0->getID() == Hexagon::PredRegsRegClassID) { @@ -254,9 +249,9 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { // Change the 1st operand and, flip the opcode. - MI->getOperand(0).setReg(PeepholeSrc); - int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode()); - MI->setDesc(QII->get(NewOp)); + MI.getOperand(0).setReg(PeepholeSrc); + int NewOp = QII->getInvertedPredicatedOpcode(MI.getOpcode()); + MI.setDesc(QII->get(NewOp)); Done = true; } } @@ -265,7 +260,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (!Done) { // Handle special instructions. - unsigned Op = MI->getOpcode(); + unsigned Op = MI.getOpcode(); unsigned NewOp = 0; unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices. @@ -282,15 +277,15 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { break; } if (NewOp) { - unsigned PSrc = MI->getOperand(PR).getReg(); + unsigned PSrc = MI.getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { - MI->getOperand(PR).setReg(POrig); - MI->setDesc(QII->get(NewOp)); + MI.getOperand(PR).setReg(POrig); + MI.setDesc(QII->get(NewOp)); // Swap operands S1 and S2. - MachineOperand Op1 = MI->getOperand(S1); - MachineOperand Op2 = MI->getOperand(S2); - ChangeOpInto(MI->getOperand(S1), Op2); - ChangeOpInto(MI->getOperand(S2), Op1); + MachineOperand Op1 = MI.getOperand(S1); + MachineOperand Op2 = MI.getOperand(S2); + ChangeOpInto(MI.getOperand(S1), Op2); + ChangeOpInto(MI.getOperand(S2), Op1); } } // if (NewOp) } // if (!Done) @@ -308,6 +303,7 @@ void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { case MachineOperand::MO_Register: if (Src.isReg()) { Dst.setReg(Src.getReg()); + Dst.setSubReg(Src.getSubReg()); } else if (Src.isImm()) { Dst.ChangeToImmediate(Src.getImm()); } else { @@ -322,6 +318,7 @@ void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(), Src.isKill(), Src.isDead(), Src.isUndef(), Src.isDebug()); + Dst.setSubReg(Src.getSubReg()); } else { llvm_unreachable("Unexpected src operand type"); } diff --git a/lib/Target/Hexagon/HexagonRDF.h b/lib/Target/Hexagon/HexagonRDF.h index 00c1889e8eb5..9a63150c377d 100644 --- a/lib/Target/Hexagon/HexagonRDF.h +++ b/lib/Target/Hexagon/HexagonRDF.h @@ -13,7 +13,6 @@ namespace llvm { class TargetRegisterInfo; -} namespace rdf { struct HexagonRegisterAliasInfo : public RegisterAliasInfo { @@ -22,7 +21,8 @@ namespace rdf { bool covers(RegisterRef RA, RegisterRef RR) const override; bool covers(const RegisterSet &RRs, RegisterRef RR) const override; }; -} +} // namespace rdf +} // namespace llvm #endif diff --git a/lib/Target/Hexagon/HexagonRDFOpt.cpp b/lib/Target/Hexagon/HexagonRDFOpt.cpp index 3fcda984d265..642a8785def9 100644 --- a/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -35,8 +35,8 @@ namespace llvm { } namespace { - cl::opt RDFLimit("rdf-limit", cl::init(UINT_MAX)); unsigned RDFCount = 0; + cl::opt RDFLimit("rdf-limit", cl::init(UINT_MAX)); cl::opt RDFDump("rdf-dump", cl::init(false)); class HexagonRDFOpt : public MachineFunctionPass { @@ -55,6 +55,11 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + static char ID; private: @@ -71,6 +76,13 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) INITIALIZE_PASS_END(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false) +namespace { +struct HexagonCP : public CopyPropagation { + HexagonCP(DataFlowGraph &G) : CopyPropagation(G) {} + bool interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) override; +}; + + struct HexagonDCE : public DeadCodeElimination { HexagonDCE(DataFlowGraph &G, MachineRegisterInfo &MRI) : DeadCodeElimination(G, MRI) {} @@ -79,6 +91,44 @@ struct HexagonDCE : public DeadCodeElimination { bool run(); }; +} // end anonymous namespace + + +bool HexagonCP::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { + auto mapRegs = [MI,&EM] (RegisterRef DstR, RegisterRef SrcR) -> void { + EM.insert(std::make_pair(DstR, SrcR)); + }; + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A2_combinew: { + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &HiOp = MI->getOperand(1); + const MachineOperand &LoOp = MI->getOperand(2); + assert(DstOp.getSubReg() == 0 && "Unexpected subregister"); + mapRegs({ DstOp.getReg(), Hexagon::subreg_hireg }, + { HiOp.getReg(), HiOp.getSubReg() }); + mapRegs({ DstOp.getReg(), Hexagon::subreg_loreg }, + { LoOp.getReg(), LoOp.getSubReg() }); + return true; + } + case Hexagon::A2_addi: { + const MachineOperand &A = MI->getOperand(2); + if (!A.isImm() || A.getImm() != 0) + return false; + } + // Fall through. + case Hexagon::A2_tfr: { + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &SrcOp = MI->getOperand(1); + mapRegs({ DstOp.getReg(), DstOp.getSubReg() }, + { SrcOp.getReg(), SrcOp.getSubReg() }); + return true; + } + } + + return CopyPropagation::interpretAsCopy(MI, EM); +} bool HexagonDCE::run() { @@ -106,6 +156,7 @@ bool HexagonDCE::run() { } } + // Nodes to remove. SetVector Remove = DeadInstrs; @@ -216,6 +267,9 @@ bool HexagonDCE::rewrite(NodeAddr IA, SetVector &Remove) { bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + if (RDFLimit.getPosition()) { if (RDFCount >= RDFLimit) return false; @@ -227,31 +281,36 @@ bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) { const auto &HII = *MF.getSubtarget().getInstrInfo(); const auto &HRI = *MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - - HexagonRegisterAliasInfo HAI(HRI); - TargetOperandInfo TOI(HII); + bool Changed; if (RDFDump) MF.print(dbgs() << "Before " << getPassName() << "\n", nullptr); + + HexagonRegisterAliasInfo HAI(HRI); + TargetOperandInfo TOI(HII); DataFlowGraph G(MF, HII, HRI, *MDT, MDF, HAI, TOI); - G.build(); - if (RDFDump) { - dbgs() << PrintNode(G.getFunc(), G) << '\n'; - dbgs() << MF.getName() << '\n'; - } + // Dead phi nodes are necessary for copy propagation: we can add a use + // of a register in a block where it would need a phi node, but which + // was dead (and removed) during the graph build time. + G.build(BuildOptions::KeepDeadPhis); - bool Changed; - CopyPropagation CP(G); + if (RDFDump) + dbgs() << "Starting copy propagation on: " << MF.getName() << '\n' + << PrintNode(G.getFunc(), G) << '\n'; + HexagonCP CP(G); CP.trace(RDFDump); Changed = CP.run(); - if (Changed) - G.build(); + if (RDFDump) + dbgs() << "Starting dead code elimination on: " << MF.getName() << '\n' + << PrintNode(G.getFunc(), G) << '\n'; HexagonDCE DCE(G, *MRI); DCE.trace(RDFDump); Changed |= DCE.run(); if (Changed) { + if (RDFDump) + dbgs() << "Starting liveness recomputation on: " << MF.getName() << '\n'; Liveness LV(*MRI, G); LV.trace(RDFDump); LV.computeLiveIns(); @@ -261,6 +320,7 @@ bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) { if (RDFDump) MF.print(dbgs() << "After " << getPassName() << "\n", nullptr); + return false; } @@ -268,5 +328,3 @@ bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) { FunctionPass *llvm::createHexagonRDFOpt() { return new HexagonRDFOpt(); } - - diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 6e5f7324aca8..23ebfd484be9 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -29,7 +29,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -54,24 +53,51 @@ bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const { const MCPhysReg * -HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const { - static const MCPhysReg CallerSavedRegsV4[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, - Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, - Hexagon::R15, 0 +HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF, + const TargetRegisterClass *RC) const { + using namespace Hexagon; + + static const MCPhysReg Int32[] = { + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, 0 + }; + static const MCPhysReg Int64[] = { + D0, D1, D2, D3, D4, D5, D6, D7, 0 + }; + static const MCPhysReg Pred[] = { + P0, P1, P2, P3, 0 + }; + static const MCPhysReg VecSgl[] = { + V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, + V14, V15, V16, V17, V18, V19, V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31, 0 + }; + static const MCPhysReg VecDbl[] = { + W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, 0 }; - auto &HST = static_cast(MF->getSubtarget()); - switch (HST.getHexagonArchVersion()) { - case HexagonSubtarget::V4: - case HexagonSubtarget::V5: - case HexagonSubtarget::V55: - case HexagonSubtarget::V60: - return CallerSavedRegsV4; + switch (RC->getID()) { + case IntRegsRegClassID: + return Int32; + case DoubleRegsRegClassID: + return Int64; + case PredRegsRegClassID: + return Pred; + case VectorRegsRegClassID: + case VectorRegs128BRegClassID: + return VecSgl; + case VecDblRegsRegClassID: + case VecDblRegs128BRegClassID: + return VecDbl; + default: + break; } - llvm_unreachable( - "Callee saved registers requested for unknown archtecture version"); + + static const MCPhysReg Empty[] = { 0 }; +#ifndef NDEBUG + dbgs() << "Register class: " << getRegClassName(RC) << "\n"; +#endif + llvm_unreachable("Unexpected register class"); + return Empty; } @@ -83,33 +109,48 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 }; + // Functions that contain a call to __builtin_eh_return also save the first 4 + // parameter registers. + static const MCPhysReg CalleeSavedRegsV3EHReturn[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, + Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + }; + + bool HasEHReturn = MF->getInfo()->hasEHReturn(); + switch (MF->getSubtarget().getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: case HexagonSubtarget::V55: case HexagonSubtarget::V60: - return CalleeSavedRegsV3; + return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3; } + llvm_unreachable("Callee saved registers requested for unknown architecture " "version"); } + BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - Reserved.set(HEXAGON_RESERVED_REG_1); - Reserved.set(HEXAGON_RESERVED_REG_2); Reserved.set(Hexagon::R29); Reserved.set(Hexagon::R30); Reserved.set(Hexagon::R31); Reserved.set(Hexagon::PC); - Reserved.set(Hexagon::GP); Reserved.set(Hexagon::D14); Reserved.set(Hexagon::D15); Reserved.set(Hexagon::LC0); Reserved.set(Hexagon::LC1); Reserved.set(Hexagon::SA0); Reserved.set(Hexagon::SA1); + Reserved.set(Hexagon::UGP); + Reserved.set(Hexagon::GP); + Reserved.set(Hexagon::CS0); + Reserved.set(Hexagon::CS1); + Reserved.set(Hexagon::CS); return Reserved; } @@ -135,6 +176,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int Offset = HFI.getFrameIndexReference(MF, FI, BP); // Add the offset from the instruction. int RealOffset = Offset + MI.getOperand(FIOp+1).getImm(); + bool IsKill = false; unsigned Opc = MI.getOpcode(); switch (Opc) { @@ -149,20 +191,22 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, break; } - if (HII.isValidOffset(Opc, RealOffset)) { - MI.getOperand(FIOp).ChangeToRegister(BP, false); - MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset); - return; + if (!HII.isValidOffset(Opc, RealOffset)) { + // If the offset is not valid, calculate the address in a temporary + // register and use it with offset 0. + auto &MRI = MF.getRegInfo(); + unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(MB, II, DL, HII.get(Hexagon::A2_addi), TmpR) + .addReg(BP) + .addImm(RealOffset); + BP = TmpR; + RealOffset = 0; + IsKill = true; } -#ifndef NDEBUG - const Function *F = MF.getFunction(); - dbgs() << "In function "; - if (F) dbgs() << F->getName(); - else dbgs() << ""; - dbgs() << ", BB#" << MB.getNumber() << "\n" << MI; -#endif - llvm_unreachable("Unhandled instruction"); + MI.getOperand(FIOp).ChangeToRegister(BP, false, false, IsKill); + MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset); } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index db7e0f27815d..fc70679bc930 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -21,21 +21,6 @@ #define GET_REGINFO_HEADER #include "HexagonGenRegisterInfo.inc" -// -// We try not to hard code the reserved registers in our code, -// so the following two macros were defined. However, there -// are still a few places that R11 and R10 are hard wired. -// See below. If, in the future, we decided to change the reserved -// register. Don't forget changing the following places. -// -// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td -// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td -// 3. the definition of "IntRegs" in HexagonRegisterInfo.td -// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td -// -#define HEXAGON_RESERVED_REG_1 Hexagon::R10 -#define HEXAGON_RESERVED_REG_2 Hexagon::R11 - namespace llvm { class HexagonRegisterInfo : public HexagonGenRegisterInfo { public: @@ -76,7 +61,8 @@ public: unsigned getFrameRegister() const; unsigned getStackRegister() const; - const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF) const; + const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF, + const TargetRegisterClass *RC) const; unsigned getFirstCallerSavedNonParamReg() const; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index 81629dc6d47f..4d0d411d73da 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -13,8 +13,8 @@ let Namespace = "Hexagon" in { - class HexagonReg num, string n, list alt = [], - list alias = []> : Register { + class HexagonReg num, string n, list alt = [], + list alias = []> : Register { field bits<5> Num; let Aliases = alias; let HWEncoding{4-0} = num; @@ -31,7 +31,8 @@ let Namespace = "Hexagon" in { // Registers are identified with 5-bit ID numbers. // Ri - 32-bit integer registers. - class Ri num, string n, list alt = []> : HexagonReg { + class Ri num, string n, list alt = []> : + HexagonReg { let Num = num; } @@ -42,8 +43,9 @@ let Namespace = "Hexagon" in { // Rd - 64-bit registers. - class Rd num, string n, list subregs> : - HexagonDoubleReg { + class Rd num, string n, list subregs, + list alt = []> : + HexagonDoubleReg { let Num = num; let SubRegs = subregs; } @@ -94,11 +96,11 @@ let Namespace = "Hexagon" in { // Aliases of the R* registers used to hold 64-bit int values (doubles). let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { - def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; - def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; - def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; - def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; - def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; + def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; + def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; + def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; + def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; + def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>; def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>; def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>; @@ -109,7 +111,7 @@ let Namespace = "Hexagon" in { def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>; def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>; def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>; - def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>; + def D15 : Rd<30, "r31:30", [R30, R31], ["lr:fp"]>, DwarfRegNum<[62]>; } // Predicate registers. @@ -130,6 +132,11 @@ let Namespace = "Hexagon" in { // on the entire USR. def USR_OVF : Rc; + def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> { + let SubRegIndices = [subreg_overflow]; + let SubRegs = [USR_OVF]; + } + // Control registers. def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>; def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>; @@ -140,11 +147,12 @@ let Namespace = "Hexagon" in { def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>; def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>; - - def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> { - let SubRegIndices = [subreg_overflow]; - let SubRegs = [USR_OVF]; - } + // Define C8 separately and make it aliased with USR. + // The problem is that USR has subregisters (e.g. overflow). If USR was + // specified as a subregister of C9_8, it would imply that subreg_overflow + // and subreg_loreg can be composed, which leads to all kinds of issues + // with lane masks. + def C8 : Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>; def PC : Rc<9, "pc">, DwarfRegNum<[76]>; def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; def GP : Rc<11, "gp">, DwarfRegNum<[78]>; @@ -159,7 +167,8 @@ let Namespace = "Hexagon" in { def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; def C7_6 : Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>; - def C9_8 : Rcc<8, "c9:8", [USR, PC]>, DwarfRegNum<[74]>; + // Use C8 instead of USR as a subregister of C9_8. + def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>; def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>; def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>; def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>; @@ -261,7 +270,13 @@ def VolatileV3 { R28, R31, P0, P1, P2, P3, M0, M1, - LC0, LC1, SA0, SA1, USR, USR_OVF]; + LC0, LC1, SA0, SA1, USR, USR_OVF, CS0, CS1, + V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, + V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, + V22, V23, V24, V25, V26, V27, V28, V29, V30, V31, + W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, + W12, W13, W14, W15, + Q0, Q1, Q2, Q3]; } def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a), diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index 67af147b25b3..7416baab392c 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -49,7 +49,6 @@ def ALU32_3op_tc_1_SLOT0123 : InstrItinClass; def ALU32_3op_tc_2_SLOT0123 : InstrItinClass; def ALU32_ADDI_tc_1_SLOT0123 : InstrItinClass; def ALU64_tc_1_SLOT23 : InstrItinClass; -def ALU64_tc_1or2_SLOT23 : InstrItinClass; def ALU64_tc_2_SLOT23 : InstrItinClass; def ALU64_tc_2early_SLOT23 : InstrItinClass; def ALU64_tc_3x_SLOT23 : InstrItinClass; @@ -64,10 +63,9 @@ def J_tc_2early_SLOT2 : InstrItinClass; def LD_tc_ld_SLOT01 : InstrItinClass; def LD_tc_ld_SLOT0 : InstrItinClass; def LD_tc_3or4stall_SLOT0 : InstrItinClass; -def M_tc_1_SLOT23 : InstrItinClass; -def M_tc_1or2_SLOT23 : InstrItinClass; def M_tc_2_SLOT23 : InstrItinClass; def M_tc_3_SLOT23 : InstrItinClass; +def M_tc_1_SLOT23 : InstrItinClass; def M_tc_3x_SLOT23 : InstrItinClass; def M_tc_3or4x_SLOT23 : InstrItinClass; def ST_tc_st_SLOT01 : InstrItinClass; @@ -79,7 +77,6 @@ def S_2op_tc_2_SLOT23 : InstrItinClass; def S_2op_tc_2early_SLOT23 : InstrItinClass; def S_2op_tc_3or4x_SLOT23 : InstrItinClass; def S_3op_tc_1_SLOT23 : InstrItinClass; -def S_3op_tc_1or2_SLOT23 : InstrItinClass; def S_3op_tc_2_SLOT23 : InstrItinClass; def S_3op_tc_2early_SLOT23 : InstrItinClass; def S_3op_tc_3_SLOT23 : InstrItinClass; @@ -95,7 +92,6 @@ def J_tc_2early_SLOT0123 : InstrItinClass; def EXTENDER_tc_1_SLOT0123 : InstrItinClass; def S_3op_tc_3stall_SLOT23 : InstrItinClass; - def HexagonItinerariesV4 : ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ // ALU32 @@ -114,7 +110,6 @@ def HexagonItinerariesV4 : // ALU64 InstrItinData]>, - InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -130,6 +125,7 @@ def HexagonItinerariesV4 : InstrItinData]>, // J InstrItinData]>, + InstrItinData]>, // JR InstrItinData]>, @@ -140,7 +136,6 @@ def HexagonItinerariesV4 : // M InstrItinData]>, - InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -159,11 +154,11 @@ def HexagonItinerariesV4 : InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, // SYS InstrItinData]>, @@ -188,6 +183,7 @@ def HexagonItinerariesV4 : InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData, @@ -199,6 +195,7 @@ def HexagonModelV4 : SchedMachineModel { let IssueWidth = 4; let Itineraries = HexagonItinerariesV4; let LoadLatency = 1; + let CompleteModel = 0; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV55.td b/lib/Target/Hexagon/HexagonScheduleV55.td index d9ad25d4cd5a..b2a75f7200d7 100644 --- a/lib/Target/Hexagon/HexagonScheduleV55.td +++ b/lib/Target/Hexagon/HexagonScheduleV55.td @@ -31,131 +31,154 @@ def COPROC_VX_vtc_SLOT23 : InstrItinClass; def J_tc_3stall_SLOT2 : InstrItinClass; def MAPPING_tc_1_SLOT0123 : InstrItinClass; def M_tc_3stall_SLOT23 : InstrItinClass; -def SUBINSN_tc_1_SLOT01 : InstrItinClass; -def SUBINSN_tc_2early_SLOT0 : InstrItinClass; -def SUBINSN_tc_2early_SLOT01 : InstrItinClass; -def SUBINSN_tc_3stall_SLOT0 : InstrItinClass; -def SUBINSN_tc_ld_SLOT0 : InstrItinClass; -def SUBINSN_tc_ld_SLOT01 : InstrItinClass; -def SUBINSN_tc_st_SLOT01 : InstrItinClass; def HexagonItinerariesV55 : ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ // ALU32 InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>, InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>, InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>, InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>, InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>, InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>, // ALU64 - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [3, 1, 1]>, // CR -> System - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], [2, 1, 1]>, + InstrItinData], [2, 1, 1]>, + InstrItinData], [3, 1, 1]>, // Jump (conditional/unconditional/return etc) - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], + [2, 1, 1, 1]>, + InstrItinData], + [3, 1, 1, 1]>, + InstrItinData], + [1, 1, 1, 1]>, + InstrItinData], + [2, 1, 1, 1]>, + InstrItinData], + [2, 1, 1, 1]>, + InstrItinData], [2, 1, 1, 1]>, // JR - InstrItinData]>, - InstrItinData]>, + InstrItinData], [2, 1, 1]>, + InstrItinData], [3, 1, 1]>, // Extender InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>, // Load - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], + [2, 1]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, // M - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [3, 1, 1]>, + InstrItinData], + [3, 1, 1]>, + InstrItinData], + [3, 1, 1]>, // Store - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // Subinsn - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], [2, 1, 1]>, + InstrItinData], [2, 1, 1]>, + InstrItinData], [1, 1, 1]>, // S - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [3, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], + [3, 1, 1]>, + InstrItinData], + [3, 1, 1]>, + InstrItinData], + [3, 1, 1]>, // New Value Compare Jump - InstrItinData]>, + InstrItinData], + [3, 1, 1, 1]>, // Mem ops - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], + [1, 1, 1, 1]>, + InstrItinData], + [2, 1, 1, 1]>, + InstrItinData], + [1, 1, 1, 1]>, + InstrItinData], + [1, 1, 1, 1]>, + InstrItinData], + [3, 1, 1, 1]>, + InstrItinData], + [1, 1, 1, 1]>, // Endloop - InstrItinData]>, + InstrItinData], + [2]>, // Vector InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 1]>, InstrItinData]>, + [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1, 1]>, InstrItinData]>, + [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1, 1]>, InstrItinData]>, + [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], + [1, 1, 1, 1]>, // Misc - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], [1, 1, 1]>, + InstrItinData], + [1, 1, 1]>, + InstrItinData], + [1, 1, 1]>, InstrItinData, - InstrStage<1, [SLOT2, SLOT3]>]> - + InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]> ]>; def HexagonModelV55 : SchedMachineModel { @@ -163,6 +186,7 @@ def HexagonModelV55 : SchedMachineModel { let IssueWidth = 4; let Itineraries = HexagonItinerariesV55; let LoadLatency = 1; + let CompleteModel = 0; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV60.td b/lib/Target/Hexagon/HexagonScheduleV60.td index 2ccff8242a47..dc2ce43b0579 100644 --- a/lib/Target/Hexagon/HexagonScheduleV60.td +++ b/lib/Target/Hexagon/HexagonScheduleV60.td @@ -167,16 +167,6 @@ def HexagonItinerariesV60 : InstrItinData]>, InstrItinData]>, - // Subinsn - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - // S InstrItinData]>, InstrItinData]>, @@ -303,6 +293,7 @@ def HexagonModelV60 : SchedMachineModel { let IssueWidth = 4; let Itineraries = HexagonItinerariesV60; let LoadLatency = 1; + let CompleteModel = 0; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index 239dbda8f27b..00dfed754995 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -17,13 +17,10 @@ using namespace llvm; #define DEBUG_TYPE "hexagon-selectiondag-info" -SDValue -HexagonSelectionDAGInfo:: -EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, - SDValue Dst, SDValue Src, SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const { +SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast(Size); if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) return SDValue(); @@ -55,7 +52,7 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, Type::getVoidTy(*DAG.getContext()), DAG.getTargetExternalSymbol( SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0) + std::move(Args)) .setDiscardResult(); std::pair CallResult = TLI.LowerCallTo(CLI); diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h index 80ac5d7bd9e2..6f2a42ce97f6 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -7,25 +7,23 @@ // //===----------------------------------------------------------------------===// // -// This file defines the Hexagon subclass for TargetSelectionDAGInfo. +// This file defines the Hexagon subclass for SelectionDAGTargetInfo. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H -#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" namespace llvm { -class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo { +class HexagonSelectionDAGInfo : public SelectionDAGTargetInfo { public: - - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; }; diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 10fe606985dd..5a94cce4ce57 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -21,7 +21,6 @@ #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonTargetObjectFile.h" -#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -32,14 +31,11 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include using namespace llvm; @@ -61,6 +57,10 @@ class HexagonSplitConst32AndConst64 : public MachineFunctionPass { return "Hexagon Split Const32s and Const64s"; } bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } }; @@ -72,7 +72,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { const HexagonTargetObjectFile &TLOF = *static_cast( Fn.getTarget().getObjFileLowering()); - if (TLOF.IsSmallDataEnabled()) + if (TLOF.isSmallDataEnabled()) return true; const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); @@ -86,55 +86,56 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator MII = MBB->begin(); MachineBasicBlock::iterator MIE = MBB->end (); while (MII != MIE) { - MachineInstr *MI = MII; - int Opc = MI->getOpcode(); + MachineInstr &MI = *MII; + int Opc = MI.getOpcode(); if (Opc == Hexagon::CONST32_Int_Real && - MI->getOperand(1).isBlockAddress()) { - int DestReg = MI->getOperand(0).getReg(); - MachineOperand &Symbol = MI->getOperand (1); - - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::LO), DestReg).addOperand(Symbol); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::HI), DestReg).addOperand(Symbol); + MI.getOperand(1).isBlockAddress()) { + int DestReg = MI.getOperand(0).getReg(); + MachineOperand &Symbol = MI.getOperand(1); + + BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::LO), DestReg) + .addOperand(Symbol); + BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::HI), DestReg) + .addOperand(Symbol); // MBB->erase returns the iterator to the next instruction, which is the // one we want to process next - MII = MBB->erase (MI); + MII = MBB->erase(&MI); continue; } else if (Opc == Hexagon::CONST32_Int_Real || Opc == Hexagon::CONST32_Float_Real) { - int DestReg = MI->getOperand(0).getReg(); + int DestReg = MI.getOperand(0).getReg(); // We have to convert an FP immediate into its corresponding integer // representation int64_t ImmValue; if (Opc == Hexagon::CONST32_Float_Real) { - APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + APFloat Val = MI.getOperand(1).getFPImm()->getValueAPF(); ImmValue = *Val.bitcastToAPInt().getRawData(); } else - ImmValue = MI->getOperand(1).getImm(); + ImmValue = MI.getOperand(1).getImm(); - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue); - MII = MBB->erase (MI); + BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi), + DestReg) + .addImm(ImmValue); + MII = MBB->erase(&MI); continue; } else if (Opc == Hexagon::CONST64_Int_Real || Opc == Hexagon::CONST64_Float_Real) { - int DestReg = MI->getOperand(0).getReg(); + int DestReg = MI.getOperand(0).getReg(); // We have to convert an FP immediate into its corresponding integer // representation int64_t ImmValue; if (Opc == Hexagon::CONST64_Float_Real) { - APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF(); + APFloat Val = MI.getOperand(1).getFPImm()->getValueAPF(); ImmValue = *Val.bitcastToAPInt().getRawData(); } else - ImmValue = MI->getOperand(1).getImm(); + ImmValue = MI.getOperand(1).getImm(); unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg); unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg); @@ -142,11 +143,13 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { int32_t LowWord = (ImmValue & 0xFFFFFFFF); int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; - BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord); - BuildMI (*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord); - MII = MBB->erase (MI); + BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi), + DestLo) + .addImm(LowWord); + BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi), + DestHi) + .addImm(HighWord); + MII = MBB->erase(&MI); continue; } ++MII; diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index d4e95b0d0210..25b2affa2f0b 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -12,13 +12,12 @@ #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" -#include "llvm/Pass.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -109,18 +108,6 @@ INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double", "Hexagon Split Double Registers", false, false) -static inline uint32_t getRegState(const MachineOperand &R) { - assert(R.isReg()); - return getDefRegState(R.isDef()) | - getImplRegState(R.isImplicit()) | - getKillRegState(R.isKill()) | - getDeadRegState(R.isDead()) | - getUndefRegState(R.isUndef()) | - getInternalReadRegState(R.isInternalRead()) | - (R.isDebug() ? RegState::Debug : 0); -} - - void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os, const USet &Part, const TargetRegisterInfo &TRI) { dbgs() << '{'; @@ -452,7 +439,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, MachineBasicBlock *TB = 0, *FB = 0; MachineBasicBlock *TmpLB = const_cast(LB); SmallVector Cond; - bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false); + bool BadLB = TII->analyzeBranch(*TmpLB, TB, FB, Cond, false); // Only analyzable conditional branches. HII::AnalyzeBranch will put // the branch opcode as the first element of Cond, and the predicate // operand as the second. @@ -477,7 +464,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg()); int Mask = 0, Val = 0; - bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val); + bool OkCI = TII->analyzeCompare(*CmpI, CmpR1, CmpR2, Mask, Val); if (!OkCI) return; // Eliminate non-double input registers. @@ -655,7 +642,7 @@ void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, MachineFunction &MF = *B.getParent(); for (auto &MO : MI->memoperands()) { const MachinePointerInfo &Ptr = MO->getPointerInfo(); - unsigned F = MO->getFlags(); + MachineMemOperand::Flags F = MO->getFlags(); int A = MO->getAlignment(); auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A); @@ -1164,6 +1151,9 @@ bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Splitting double registers in function: " << MF.getName() << '\n'); + if (skipFunction(*MF.getFunction())) + return false; + auto &ST = MF.getSubtarget(); TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp index b5339ff4c0dc..54bc3cf6f6ff 100644 --- a/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -594,6 +594,9 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { + if (skipFunction(*MFn.getFunction())) + return false; + MF = &MFn; auto &ST = MFn.getSubtarget(); TII = ST.getInstrInfo(); diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index aa0efd4f65e0..fb315a730f39 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -14,6 +14,8 @@ #include "HexagonSubtarget.h" #include "Hexagon.h" #include "HexagonRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include @@ -49,10 +51,24 @@ static cl::opt EnableHexagonHVX("enable-hexagon-hvx", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable Hexagon Vector eXtensions")); +static cl::opt EnableTCLatencySched("enable-tc-latency-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt EnableDotCurSched("enable-cur-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true), + cl::desc("Enable the scheduler to generate .cur")); + +static cl::opt EnableVecFrwdSched("enable-evec-frwd-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + static cl::opt DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon MI Scheduling")); +static cl::opt EnableSubregLiveness("hexagon-subreg-liveness", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable subregister liveness tracking for Hexagon")); + void HexagonSubtarget::initializeEnvironment() { UseMemOps = false; ModeIEEERndNear = false; @@ -115,6 +131,57 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, UseBSBScheduling = hasV60TOps() && EnableBSBSched; } + +void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { + for (auto &SU : DAG->SUnits) { + if (!SU.isInstr()) + continue; + SmallVector Erase; + for (auto &D : SU.Preds) + if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF) + Erase.push_back(D); + for (auto &E : Erase) + SU.removePred(E); + } + + for (auto &SU : DAG->SUnits) { + // Update the latency of chain edges between v60 vector load or store + // instructions to be 1. These instructions cannot be scheduled in the + // same packet. + MachineInstr *MI1 = SU.getInstr(); + auto *QII = static_cast(DAG->TII); + bool IsStoreMI1 = MI1->mayStore(); + bool IsLoadMI1 = MI1->mayLoad(); + if (!QII->isV60VectorInstruction(MI1) || !(IsStoreMI1 || IsLoadMI1)) + continue; + for (auto &SI : SU.Succs) { + if (SI.getKind() != SDep::Order || SI.getLatency() != 0) + continue; + MachineInstr *MI2 = SI.getSUnit()->getInstr(); + if (!QII->isV60VectorInstruction(MI2)) + continue; + if ((IsStoreMI1 && MI2->mayStore()) || (IsLoadMI1 && MI2->mayLoad())) { + SI.setLatency(1); + SU.setHeightDirty(); + // Change the dependence in the opposite direction too. + for (auto &PI : SI.getSUnit()->Preds) { + if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order) + continue; + PI.setLatency(1); + SI.getSUnit()->setDepthDirty(); + } + } + } + } +} + + +void HexagonSubtarget::getPostRAMutations( + std::vector> &Mutations) const { + Mutations.push_back(make_unique()); +} + + // Pin the vtable to this file. void HexagonSubtarget::anchor() {} @@ -123,3 +190,180 @@ bool HexagonSubtarget::enableMachineScheduler() const { return !DisableHexagonMISched; return true; } + +bool HexagonSubtarget::enableSubRegLiveness() const { + return EnableSubregLiveness; +} + +// This helper function is responsible for increasing the latency only. +void HexagonSubtarget::updateLatency(MachineInstr *SrcInst, + MachineInstr *DstInst, SDep &Dep) const { + if (!hasV60TOps()) + return; + + auto &QII = static_cast(*getInstrInfo()); + + if (EnableVecFrwdSched && QII.addLatencyToSchedule(SrcInst, DstInst)) { + // Vec frwd scheduling. + Dep.setLatency(Dep.getLatency() + 1); + } else if (useBSBScheduling() && + QII.isLateInstrFeedsEarlyInstr(SrcInst, DstInst)) { + // BSB scheduling. + Dep.setLatency(Dep.getLatency() + 1); + } else if (EnableTCLatencySched) { + // TClass latency scheduling. + // Check if SrcInst produces in 2C an operand of DstInst taken in stage 2B. + if (QII.isTC1(SrcInst) || QII.isTC2(SrcInst)) + if (!QII.isTC1(DstInst) && !QII.isTC2(DstInst)) + Dep.setLatency(Dep.getLatency() + 1); + } +} + +/// If the SUnit has a zero latency edge, return the other SUnit. +static SUnit *getZeroLatency(SUnit *N, SmallVector &Deps) { + for (auto &I : Deps) + if (I.isAssignedRegDep() && I.getLatency() == 0 && + !I.getSUnit()->getInstr()->isPseudo()) + return I.getSUnit(); + return nullptr; +} + +/// Change the latency between the two SUnits. +void HexagonSubtarget::changeLatency(SUnit *Src, SmallVector &Deps, + SUnit *Dst, unsigned Lat) const { + MachineInstr *SrcI = Src->getInstr(); + for (auto &I : Deps) { + if (I.getSUnit() != Dst) + continue; + I.setLatency(Lat); + SUnit *UpdateDst = I.getSUnit(); + updateLatency(SrcI, UpdateDst->getInstr(), I); + // Update the latency of opposite edge too. + for (auto &PI : UpdateDst->Preds) { + if (PI.getSUnit() != Src || !PI.isAssignedRegDep()) + continue; + PI.setLatency(Lat); + updateLatency(SrcI, UpdateDst->getInstr(), PI); + } + } +} + +// Return true if these are the best two instructions to schedule +// together with a zero latency. Only one dependence should have a zero +// latency. If there are multiple choices, choose the best, and change +// ther others, if needed. +bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, + const HexagonInstrInfo *TII) const { + MachineInstr *SrcInst = Src->getInstr(); + MachineInstr *DstInst = Dst->getInstr(); + + if (SrcInst->isPHI() || DstInst->isPHI()) + return false; + + // Check if the Dst instruction is the best candidate first. + SUnit *Best = nullptr; + SUnit *DstBest = nullptr; + SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds); + if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) { + // Check that Src doesn't have a better candidate. + DstBest = getZeroLatency(Src, Src->Succs); + if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum) + Best = Dst; + } + if (Best != Dst) + return false; + + // The caller frequents adds the same dependence twice. If so, then + // return true for this case too. + if (Src == SrcBest && Dst == DstBest) + return true; + + // Reassign the latency for the previous bests, which requires setting + // the dependence edge in both directions. + if (SrcBest != nullptr) + changeLatency(SrcBest, SrcBest->Succs, Dst, 1); + if (DstBest != nullptr) + changeLatency(Src, Src->Succs, DstBest, 1); + // If there is an edge from SrcBest to DstBst, then try to change that + // to 0 now. + if (SrcBest && DstBest) + changeLatency(SrcBest, SrcBest->Succs, DstBest, 0); + + return true; +} + +// Update the latency of a Phi when the Phi bridges two instructions that +// require a multi-cycle latency. +void HexagonSubtarget::changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, + SDep &Dep) const { + if (!SrcInst->isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0) + return; + + for (const SDep &PI : Dst->Preds) { + if (PI.getLatency() != 0) + continue; + Dep.setLatency(2); + break; + } +} + +/// \brief Perform target specific adjustments to the latency of a schedule +/// dependency. +void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, + SDep &Dep) const { + MachineInstr *SrcInst = Src->getInstr(); + MachineInstr *DstInst = Dst->getInstr(); + if (!Src->isInstr() || !Dst->isInstr()) + return; + + const HexagonInstrInfo *QII = static_cast(getInstrInfo()); + + // Instructions with .new operands have zero latency. + if (QII->canExecuteInBundle(SrcInst, DstInst) && + isBestZeroLatency(Src, Dst, QII)) { + Dep.setLatency(0); + return; + } + + if (!hasV60TOps()) + return; + + // Don't adjust the latency of post-increment part of the instruction. + if (QII->isPostIncrement(SrcInst) && Dep.isAssignedRegDep()) { + if (SrcInst->mayStore()) + return; + if (Dep.getReg() != SrcInst->getOperand(0).getReg()) + return; + } else if (QII->isPostIncrement(DstInst) && Dep.getKind() == SDep::Anti) { + if (DstInst->mayStore()) + return; + if (Dep.getReg() != DstInst->getOperand(0).getReg()) + return; + } else if (QII->isPostIncrement(DstInst) && DstInst->mayStore() && + Dep.isAssignedRegDep()) { + MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1); + if (Op.isReg() && Dep.getReg() != Op.getReg()) + return; + } + + // Check if we need to change any the latency values when Phis are added. + if (useBSBScheduling() && SrcInst->isPHI()) { + changePhiLatency(SrcInst, Dst, Dep); + return; + } + + // If it's a REG_SEQUENCE, use its destination instruction to determine + // the correct latency. + if (DstInst->isRegSequence() && Dst->NumSuccs == 1) + DstInst = Dst->Succs[0].getSUnit()->getInstr(); + + // Try to schedule uses near definitions to generate .cur. + if (EnableDotCurSched && QII->isToBeScheduledASAP(SrcInst, DstInst) && + isBestZeroLatency(Src, Dst, QII)) { + Dep.setLatency(0); + return; + } + + updateLatency(SrcInst, DstInst, Dep); +} + diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index c7ae139c4346..9b40c130e622 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -18,7 +18,6 @@ #include "HexagonISelLowering.h" #include "HexagonInstrInfo.h" #include "HexagonSelectionDAGInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -47,6 +46,11 @@ public: /// default for V60. bool UseBSBScheduling; + class HexagonDAGMutation : public ScheduleDAGMutation { + public: + void apply(ScheduleDAGInstrs *DAG) override; + }; + private: std::string CPUString; HexagonInstrInfo InstrInfo; @@ -105,6 +109,11 @@ public: // compiler time and will be removed eventually anyway. bool enableMachineSchedDefaultSched() const override { return false; } + AntiDepBreakMode getAntiDepBreakMode() const override { return ANTIDEP_ALL; } + bool enablePostRAScheduler() const override { return true; } + + bool enableSubRegLiveness() const override; + const std::string &getCPUString () const { return CPUString; } // Threshold for small data section @@ -114,6 +123,24 @@ public: const HexagonArchEnum &getHexagonArchVersion() const { return HexagonArchVersion; } + + void getPostRAMutations( + std::vector> &Mutations) + const override; + + /// \brief Perform target specific adjustments to the latency of a schedule + /// dependency. + void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override; + +private: + // Helper function responsible for increasing the latency only. + void updateLatency(MachineInstr *SrcInst, MachineInstr *DstInst, SDep &Dep) + const; + void changeLatency(SUnit *Src, SmallVector &Deps, SUnit *Dst, + unsigned Lat) const; + bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII) + const; + void changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, SDep &Dep) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonSystemInst.td b/lib/Target/Hexagon/HexagonSystemInst.td index 784686a437ad..771498a40b99 100644 --- a/lib/Target/Hexagon/HexagonSystemInst.td +++ b/lib/Target/Hexagon/HexagonSystemInst.td @@ -111,3 +111,24 @@ def Y2_isync: JRInst <(outs), (ins), let Inst{9-0} = 0b0000000010; } +//===----------------------------------------------------------------------===// +// System/User instructions. +//===----------------------------------------------------------------------===// +// traps and pause +let hasSideEffects = 0, isSolo = 1 in +class J2_MISC_TRAP_PAUSE MajOp> + : JRInst + <(outs), (ins u8Imm:$u8), + #mnemonic#"(#$u8)"> { + bits<8> u8; + + let IClass = 0b0101; + let Inst{27-24} = 0b0100; + let Inst{23-22} = MajOp; + let Inst{12-8} = u8{7-3}; + let Inst{4-2} = u8{2-0}; + } +def J2_trap0 : J2_MISC_TRAP_PAUSE<"trap0", 0b00>; +def J2_trap1 : J2_MISC_TRAP_PAUSE<"trap1", 0b10>; +def J2_pause : J2_MISC_TRAP_PAUSE<"pause", 0b01>; + diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 34b03fb74cef..f964a6612f43 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -18,6 +18,7 @@ #include "HexagonTargetObjectFile.h" #include "HexagonTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" @@ -33,6 +34,10 @@ static cl::opt EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore, static cl::opt DisableHardwareLoops("disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); +static cl::opt DisableAModeOpt("disable-hexagon-amodeopt", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon Addressing Mode Optimization")); + static cl::opt DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon CFG Optimization")); @@ -72,6 +77,9 @@ static cl::opt EnableBitSimplify("hexagon-bit", cl::init(true), static cl::opt EnableLoopResched("hexagon-loop-resched", cl::init(true), cl::Hidden, cl::desc("Loop rescheduling")); +static cl::opt HexagonNoOpt("hexagon-noopt", cl::init(false), + cl::Hidden, cl::desc("Disable backend optimizations")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -95,13 +103,13 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", namespace llvm { FunctionPass *createHexagonBitSimplify(); + FunctionPass *createHexagonBranchRelaxation(); FunctionPass *createHexagonCallFrameInformation(); FunctionPass *createHexagonCFGOptimizer(); FunctionPass *createHexagonCommonGEP(); FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonEarlyIfConversion(); FunctionPass *createHexagonExpandCondsets(); - FunctionPass *createHexagonExpandPredSpillCode(); FunctionPass *createHexagonFixupHwLoops(); FunctionPass *createHexagonGenExtract(); FunctionPass *createHexagonGenInsert(); @@ -113,6 +121,7 @@ namespace llvm { FunctionPass *createHexagonLoopRescheduling(); FunctionPass *createHexagonNewValueJump(); FunctionPass *createHexagonOptimizeSZextends(); + FunctionPass *createHexagonOptAddrMode(); FunctionPass *createHexagonPacketizer(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonRDFOpt(); @@ -121,19 +130,27 @@ namespace llvm { FunctionPass *createHexagonStoreWidening(); } // end namespace llvm; -/// HexagonTargetMachine ctor - Create an ILP32 architecture model. -/// +static Reloc::Model getEffectiveRelocModel(Optional RM) { + if (!RM.hasValue()) + return Reloc::Static; + return *RM; +} -/// Hexagon_TODO: Do I need an aggregate alignment? -/// HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional RM, + CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-" - "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-" - "n16:32", TT, CPU, FS, Options, RM, CM, OL), + // Specify the vector alignment explicitly. For v512x1, the calculated + // alignment would be 512*alignment(i1), which is 512 bytes, instead of + // the required minimum of 64 bytes. + : LLVMTargetMachine( + T, "e-m:e-p:32:32:32-a:0-n16:32-" + "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-" + "v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048", + TT, CPU, FS, Options, getEffectiveRelocModel(RM), CM, + (HexagonNoOpt ? CodeGenOpt::None : OL)), TLOF(make_unique()) { initAsmInfo(); } @@ -178,15 +195,7 @@ namespace { class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) { - bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None); - if (!NoOpt) { - if (EnableExpandCondsets) { - Pass *Exp = createHexagonExpandCondsets(); - insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp)); - } - } - } + : TargetPassConfig(TM, PM) {} HexagonTargetMachine &getHexagonTargetMachine() const { return getTM(); @@ -259,6 +268,10 @@ bool HexagonPassConfig::addInstSelector() { void HexagonPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { + if (EnableExpandCondsets) { + Pass *Exp = createHexagonExpandCondsets(); + insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp)); + } if (!DisableStoreWidening) addPass(createHexagonStoreWidening(), false); if (!DisableHardwareLoops) @@ -272,6 +285,8 @@ void HexagonPassConfig::addPostRegAlloc() { addPass(createHexagonRDFOpt()); if (!DisableHexagonCFGOpt) addPass(createHexagonCFGOptimizer(), false); + if (!DisableAModeOpt) + addPass(createHexagonOptAddrMode(), false); } } @@ -288,8 +303,7 @@ void HexagonPassConfig::addPreEmitPass() { if (!NoOpt) addPass(createHexagonNewValueJump(), false); - // Expand Spill code for predicate registers. - addPass(createHexagonExpandPredSpillCode(), false); + addPass(createHexagonBranchRelaxation(), false); // Create Packets. if (!NoOpt) { diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index 968814b3ea32..70835c0d4ac5 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -30,7 +30,7 @@ class HexagonTargetMachine : public LLVMTargetMachine { public: HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~HexagonTargetMachine() override; const HexagonSubtarget *getSubtargetImpl(const Function &F) const override; diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index ccca62021f5b..82b437eb6a0c 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -1,4 +1,4 @@ -//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===// +//===-- HexagonTargetObjectFile.cpp ---------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -10,10 +10,10 @@ // This file contains the declarations of the HexagonTargetAsmInfo properties. // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-sdata" -#include "HexagonTargetObjectFile.h" -#include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" +#include "HexagonTargetObjectFile.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -24,75 +24,368 @@ using namespace llvm; -static cl::opt SmallDataThreshold("hexagon-small-data-threshold", - cl::init(8), cl::Hidden, - cl::desc("The maximum size of an object in the sdata section")); +static cl::opt SmallDataThreshold("hexagon-small-data-threshold", + cl::init(8), cl::Hidden, + cl::desc("The maximum size of an object in the sdata section")); + +static cl::opt NoSmallDataSorting("mno-sort-sda", cl::init(false), + cl::Hidden, cl::desc("Disable small data sections sorting")); + +static cl::opt StaticsInSData("hexagon-statics-in-small-data", + cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Allow static variables in .sdata")); + +static cl::opt TraceGVPlacement("trace-gv-placement", + cl::Hidden, cl::init(false), + cl::desc("Trace global value placement")); + +// TraceGVPlacement controls messages for all builds. For builds with assertions +// (debug or release), messages are also controlled by the usual debug flags +// (e.g. -debug and -debug-only=globallayout) +#define TRACE_TO(s, X) s << X +#ifdef NDEBUG +#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0) +#else +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { TRACE_TO(errs(), X); } \ + else { DEBUG( TRACE_TO(dbgs(), X) ); } \ + } while (0) +#endif + +// Returns true if the section name is such that the symbol will be put +// in a small data section. +// For instance, global variables with section attributes such as ".sdata" +// ".sdata.*", ".sbss", and ".sbss.*" will go into small data. +static bool isSmallDataSection(StringRef Sec) { + // sectionName is either ".sdata" or ".sbss". Looking for an exact match + // obviates the need for checks for section names such as ".sdatafoo". + if (Sec.equals(".sdata") || Sec.equals(".sbss") || Sec.equals(".scommon")) + return true; + // If either ".sdata." or ".sbss." is a substring of the section name + // then put the symbol in small data. + return Sec.find(".sdata.") != StringRef::npos || + Sec.find(".sbss.") != StringRef::npos || + Sec.find(".scommon.") != StringRef::npos; +} + + +static const char *getSectionSuffixForSize(unsigned Size) { + switch (Size) { + default: + return ""; + case 1: + return ".1"; + case 2: + return ".2"; + case 4: + return ".4"; + case 8: + return ".8"; + } +} void HexagonTargetObjectFile::Initialize(MCContext &Ctx, - const TargetMachine &TM) { + const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); - SmallDataSection = getContext().getELFSection( - ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); - SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS, - ELF::SHF_WRITE | ELF::SHF_ALLOC); + SmallDataSection = + getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC | + ELF::SHF_HEX_GPREL); + SmallBSSSection = + getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC | + ELF::SHF_HEX_GPREL); } -// sdata/sbss support taken largely from the MIPS Backend. -static bool IsInSmallSection(uint64_t Size) { - return Size > 0 && Size <= (uint64_t)SmallDataThreshold; -} -bool HexagonTargetObjectFile::IsSmallDataEnabled () const { - return SmallDataThreshold > 0; +MCSection *HexagonTargetObjectFile::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + TRACE("[SelectSectionForGlobal] GV(" << GV->getName() << ") "); + TRACE("input section(" << GV->getSection() << ") "); + + TRACE((GV->hasPrivateLinkage() ? "private_linkage " : "") + << (GV->hasLocalLinkage() ? "local_linkage " : "") + << (GV->hasInternalLinkage() ? "internal " : "") + << (GV->hasExternalLinkage() ? "external " : "") + << (GV->hasCommonLinkage() ? "common_linkage " : "") + << (GV->hasCommonLinkage() ? "common " : "" ) + << (Kind.isCommon() ? "kind_common " : "" ) + << (Kind.isBSS() ? "kind_bss " : "" ) + << (Kind.isBSSLocal() ? "kind_bss_local " : "" )); + + if (isGlobalInSmallSection(GV, TM)) + return selectSmallSectionForGlobal(GV, Kind, Mang, TM); + + if (Kind.isCommon()) { + // This is purely for LTO+Linker Script because commons don't really have a + // section. However, the BitcodeSectionWriter pass will query for the + // sections of commons (and the linker expects us to know their section) so + // we'll return one here. + return BSSSection; + } + + TRACE("default_ELF_section\n"); + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, + Mang, TM); } -/// IsGlobalInSmallSection - Return true if this global value should be -/// placed into small data/bss section. -bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV, - const TargetMachine &TM) const { - // If the primary definition of this global value is outside the current - // translation unit or the global value is available for inspection but not - // emission, then do nothing. - if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) - return false; - // Otherwise, Check if GV should be in sdata/sbss, when normally it would end - // up in getKindForGlobal(GV, TM). - return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); +MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + TRACE("[getExplicitSectionGlobal] GV(" << GV->getName() << ") from(" + << GV->getSection() << ") "); + TRACE((GV->hasPrivateLinkage() ? "private_linkage " : "") + << (GV->hasLocalLinkage() ? "local_linkage " : "") + << (GV->hasInternalLinkage() ? "internal " : "") + << (GV->hasExternalLinkage() ? "external " : "") + << (GV->hasCommonLinkage() ? "common_linkage " : "") + << (GV->hasCommonLinkage() ? "common " : "" ) + << (Kind.isCommon() ? "kind_common " : "" ) + << (Kind.isBSS() ? "kind_bss " : "" ) + << (Kind.isBSSLocal() ? "kind_bss_local " : "" )); + + if (GV->hasSection()) { + StringRef Section = GV->getSection(); + if (Section.find(".access.text.group") != StringRef::npos) + return getContext().getELFSection(GV->getSection(), ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_EXECINSTR); + if (Section.find(".access.data.group") != StringRef::npos) + return getContext().getELFSection(GV->getSection(), ELF::SHT_PROGBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC); + } + + if (isGlobalInSmallSection(GV, TM)) + return selectSmallSectionForGlobal(GV, Kind, Mang, TM); + + // Otherwise, we work the same as ELF. + TRACE("default_ELF_section\n"); + return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GV, Kind, + Mang, TM); } -/// IsGlobalInSmallSection - Return true if this global value should be -/// placed into small data/bss section. -bool HexagonTargetObjectFile:: -IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, - SectionKind Kind) const { + +/// Return true if this global value should be placed into small data/bss +/// section. +bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const { // Only global variables, not functions. - const GlobalVariable *GVA = dyn_cast(GV); - if (!GVA) + DEBUG(dbgs() << "Checking if value is in small-data, -G" + << SmallDataThreshold << ": \"" << GV->getName() << "\": "); + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar) { + DEBUG(dbgs() << "no, not a global variable\n"); return false; + } - if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) { - Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection( - GV->getParent()->getDataLayout().getTypeAllocSize(Ty)); + // Globals with external linkage that have an original section set must be + // emitted to that section, regardless of whether we would put them into + // small data or not. This is how we can support mixing -G0/-G8 in LTO. + if (GVar->hasSection()) { + bool IsSmall = isSmallDataSection(GVar->getSection()); + DEBUG(dbgs() << (IsSmall ? "yes" : "no") << ", has section: " + << GVar->getSection() << '\n'); + return IsSmall; } - return false; + if (GVar->isConstant()) { + DEBUG(dbgs() << "no, is a constant\n"); + return false; + } + + bool IsLocal = GVar->hasLocalLinkage(); + if (!StaticsInSData && IsLocal) { + DEBUG(dbgs() << "no, is static\n"); + return false; + } + + Type *GType = GVar->getType(); + if (PointerType *PT = dyn_cast(GType)) + GType = PT->getElementType(); + + if (isa(GType)) { + DEBUG(dbgs() << "no, is an array\n"); + return false; + } + + // If the type is a struct with no body provided, treat is conservatively. + // There cannot be actual definitions of object of such a type in this CU + // (only references), so assuming that they are not in sdata is safe. If + // these objects end up in the sdata, the references will still be valid. + if (StructType *ST = dyn_cast(GType)) { + if (ST->isOpaque()) { + DEBUG(dbgs() << "no, has opaque type\n"); + return false; + } + } + + unsigned Size = GVar->getParent()->getDataLayout().getTypeAllocSize(GType); + if (Size == 0) { + DEBUG(dbgs() << "no, has size 0\n"); + return false; + } + if (Size > SmallDataThreshold) { + DEBUG(dbgs() << "no, size exceeds sdata threshold: " << Size << '\n'); + return false; + } + + DEBUG(dbgs() << "yes\n"); + return true; +} + + +bool HexagonTargetObjectFile::isSmallDataEnabled() const { + return SmallDataThreshold > 0; +} + + +unsigned HexagonTargetObjectFile::getSmallDataSize() const { + return SmallDataThreshold; +} + + +/// Descends any type down to "elementary" components, +/// discovering the smallest addressable one. +/// If zero is returned, declaration will not be modified. +unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty, + const GlobalValue *GV, const TargetMachine &TM) const { + // Assign the smallest element access size to the highest + // value which assembler can handle. + unsigned SmallestElement = 8; + + if (!Ty) + return 0; + switch (Ty->getTypeID()) { + case Type::StructTyID: { + const StructType *STy = cast(Ty); + for (auto &E : STy->elements()) { + unsigned AtomicSize = getSmallestAddressableSize(E, GV, TM); + if (AtomicSize < SmallestElement) + SmallestElement = AtomicSize; + } + return (STy->getNumElements() == 0) ? 0 : SmallestElement; + } + case Type::ArrayTyID: { + const ArrayType *ATy = cast(Ty); + return getSmallestAddressableSize(ATy->getElementType(), GV, TM); + } + case Type::VectorTyID: { + const VectorType *PTy = cast(Ty); + return getSmallestAddressableSize(PTy->getElementType(), GV, TM); + } + case Type::PointerTyID: + case Type::HalfTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::IntegerTyID: { + const DataLayout &DL = GV->getParent()->getDataLayout(); + // It is unfortunate that DL's function take non-const Type*. + return DL.getTypeAllocSize(const_cast(Ty)); + } + case Type::FunctionTyID: + case Type::VoidTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + case Type::X86_MMXTyID: + case Type::TokenTyID: + return 0; + } + + return 0; } -MCSection * -HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, - SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { +MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + const Type *GTy = GV->getType()->getElementType(); + unsigned Size = getSmallestAddressableSize(GTy, GV, TM); + + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a unique section specifically for it... even for sdata. + bool EmitUniquedSection = TM.getDataSections(); + + TRACE("Small data. Size(" << Size << ")"); // Handle Small Section classification here. - if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) - return SmallBSSSection; - if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind)) - return SmallDataSection; + if (Kind.isBSS() || Kind.isBSSLocal()) { + // If -mno-sort-sda is not set, find out smallest accessible entity in + // declaration and add it to the section name string. + // Note. It does not track the actual usage of the value, only its de- + // claration. Also, compiler adds explicit pad fields to some struct + // declarations - they are currently counted towards smallest addres- + // sable entity. + if (NoSmallDataSorting) { + TRACE(" default sbss\n"); + return SmallBSSSection; + } + + StringRef Prefix(".sbss"); + SmallString<128> Name(Prefix); + Name.append(getSectionSuffixForSize(Size)); + + if (EmitUniquedSection) { + Name.append("."); + Name.append(GV->getName()); + } + TRACE(" unique sbss(" << Name << ")\n"); + return getContext().getELFSection(Name.str(), ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_HEX_GPREL); + } + + if (Kind.isCommon()) { + // This is purely for LTO+Linker Script because commons don't really have a + // section. However, the BitcodeSectionWriter pass will query for the + // sections of commons (and the linker expects us to know their section) so + // we'll return one here. + if (NoSmallDataSorting) + return BSSSection; + + Twine Name = Twine(".scommon") + getSectionSuffixForSize(Size); + TRACE(" small COMMON (" << Name << ")\n"); + + return getContext().getELFSection(Name.str(), ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC | + ELF::SHF_HEX_GPREL); + } + + // We could have changed sdata object to a constant... in this + // case the Kind could be wrong for it. + if (Kind.isMergeableConst()) { + TRACE(" const_object_as_data "); + const GlobalVariable *GVar = dyn_cast(GV); + if (GVar->hasSection() && isSmallDataSection(GVar->getSection())) + Kind = SectionKind::getData(); + } + + if (Kind.isData()) { + if (NoSmallDataSorting) { + TRACE(" default sdata\n"); + return SmallDataSection; + } + + StringRef Prefix(".sdata"); + SmallString<128> Name(Prefix); + Name.append(getSectionSuffixForSize(Size)); + + if (EmitUniquedSection) { + Name.append("."); + Name.append(GV->getName()); + } + TRACE(" unique sdata(" << Name << ")\n"); + return getContext().getELFSection(Name.str(), ELF::SHT_PROGBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_HEX_GPREL); + } + TRACE("default ELF section\n"); // Otherwise, we work the same as ELF. - return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, + Mang, TM); } diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h index da0eeeb3fd28..cbc00da88c58 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.h +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -1,4 +1,4 @@ -//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===// +//===-- HexagonTargetObjectFile.h -----------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -16,24 +16,31 @@ namespace llvm { class HexagonTargetObjectFile : public TargetLoweringObjectFileELF { - MCSectionELF *SmallDataSection; - MCSectionELF *SmallBSSSection; - public: void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - /// IsGlobalInSmallSection - Return true if this global address should be - /// placed into small data/bss section. - bool IsGlobalInSmallSection(const GlobalValue *GV, - const TargetMachine &TM, - SectionKind Kind) const; - bool IsGlobalInSmallSection(const GlobalValue *GV, - const TargetMachine &TM) const; - - bool IsSmallDataEnabled () const; MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, - const TargetMachine &TM) const override; + Mangler &Mang, const TargetMachine &TM) const override; + + MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, const TargetMachine &TM) const override; + + bool isGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) + const; + + bool isSmallDataEnabled() const; + + unsigned getSmallDataSize() const; + + private: + MCSectionELF *SmallDataSection; + MCSectionELF *SmallBSSSection; + + unsigned getSmallestAddressableSize(const Type *Ty, const GlobalValue *GV, + const TargetMachine &TM) const; + + MCSection *selectSmallSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const; }; } // namespace llvm diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 81850548bb6e..d326b9471315 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -29,8 +29,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include -#include using namespace llvm; @@ -81,6 +79,10 @@ namespace { return "Hexagon Packetizer"; } bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } private: const HexagonInstrInfo *HII; @@ -106,16 +108,19 @@ HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) { HII = MF.getSubtarget().getInstrInfo(); HRI = MF.getSubtarget().getRegisterInfo(); + + addMutation(make_unique()); } // Check if FirstI modifies a register that SecondI reads. -static bool hasWriteToReadDep(const MachineInstr *FirstI, - const MachineInstr *SecondI, const TargetRegisterInfo *TRI) { - for (auto &MO : FirstI->operands()) { +static bool hasWriteToReadDep(const MachineInstr &FirstI, + const MachineInstr &SecondI, + const TargetRegisterInfo *TRI) { + for (auto &MO : FirstI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; unsigned R = MO.getReg(); - if (SecondI->readsRegister(R, TRI)) + if (SecondI.readsRegister(R, TRI)) return true; } return false; @@ -146,7 +151,7 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, B.splice(InsertPt, &B, MI); // Get the size of the bundle without asserting. - MachineBasicBlock::const_instr_iterator I(BundleIt); + MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator(); MachineBasicBlock::const_instr_iterator E = B.instr_end(); unsigned Size = 0; for (++I; I != E && I->isBundledWithPred(); ++I) @@ -168,7 +173,7 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { - if (DisablePacketizer) + if (DisablePacketizer || skipFunction(*MF.getFunction())) return false; HII = MF.getSubtarget().getInstrInfo(); @@ -216,12 +221,12 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { // First the first non-boundary starting from the end of the last // scheduling region. MachineBasicBlock::iterator RB = Begin; - while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF)) + while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF)) ++RB; // First the first boundary starting from the beginning of the new // region. MachineBasicBlock::iterator RE = RB; - while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF)) + while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF)) ++RE; // Add the scheduling boundary if it's not block end. if (RE != End) @@ -254,9 +259,9 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt() { // return true, otherwise, return false. bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc()); - bool Avail = ResourceTracker->canReserveResources(ExtMI); + bool Avail = ResourceTracker->canReserveResources(*ExtMI); if (Reserve && Avail) - ResourceTracker->reserveResources(ExtMI); + ResourceTracker->reserveResources(*ExtMI); MF.DeleteMachineInstr(ExtMI); return Avail; } @@ -365,7 +370,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, const TargetRegisterClass *RC) { if (!HII->isV60VectorInstruction(MI)) return false; - if (!HII->isV60VectorInstruction(MII)) + if (!HII->isV60VectorInstruction(&*MII)) return false; // Already a dot new instruction. @@ -383,11 +388,14 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; MI->dump(); dbgs() << "in packet\n";); - MachineInstr *MJ = MII; - DEBUG(dbgs() << "Checking CUR against "; MJ->dump();); + MachineInstr &MJ = *MII; + DEBUG({ + dbgs() << "Checking CUR against "; + MJ.dump(); + }); unsigned DestReg = MI->getOperand(0).getReg(); bool FoundMatch = false; - for (auto &MO : MJ->operands()) + for (auto &MO : MJ.operands()) if (MO.isReg() && MO.getReg() == DestReg) FoundMatch = true; if (!FoundMatch) @@ -436,7 +444,7 @@ enum PredicateKind { /// Returns true if an instruction is predicated on p0 and false if it's /// predicated on !p0. -static PredicateKind getPredicateSense(const MachineInstr *MI, +static PredicateKind getPredicateSense(const MachineInstr &MI, const HexagonInstrInfo *HII) { if (!HII->isPredicated(MI)) return PK_Unknown; @@ -570,8 +578,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, // If the source that feeds the store is predicated, new value store must // also be predicated. - if (HII->isPredicated(PacketMI)) { - if (!HII->isPredicated(MI)) + if (HII->isPredicated(*PacketMI)) { + if (!HII->isPredicated(*MI)) return false; // Check to make sure that they both will have their predicates @@ -613,8 +621,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, // 3) Both new-value register producer and user should have same predicate // sense, i.e, either both should be negated or both should be non-negated. if (predRegNumDst != predRegNumSrc || - HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || - getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII)) + HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || + getPredicateSense(*MI, HII) != getPredicateSense(*PacketMI, HII)) return false; } @@ -762,7 +770,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, int NewOpcode = HII->getDotNewOp(MI); const MCInstrDesc &D = HII->get(NewOpcode); MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); - bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); + bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI); MF.DeleteMachineInstr(NewMI); if (!ResourcesAvailable) return false; @@ -793,7 +801,7 @@ bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, for (auto I : CurrentPacketMIs) { // We only care for dependencies to predicated instructions - if (!HII->isPredicated(I)) + if (!HII->isPredicated(*I)) continue; // Scheduling Unit for current insn in the packet @@ -817,13 +825,13 @@ bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, /// Gets the predicate register of a predicated instruction. -static unsigned getPredicatedRegister(MachineInstr *MI, +static unsigned getPredicatedRegister(MachineInstr &MI, const HexagonInstrInfo *QII) { /// We use the following rule: The first predicate register that is a use is /// the predicate register of a predicated instruction. assert(QII->isPredicated(MI) && "Must be predicated instruction"); - for (auto &Op : MI->operands()) { + for (auto &Op : MI.operands()) { if (Op.isReg() && Op.getReg() && Op.isUse() && Hexagon::PredRegsRegClass.contains(Op.getReg())) return Op.getReg(); @@ -835,8 +843,8 @@ static unsigned getPredicatedRegister(MachineInstr *MI, // Given two predicated instructions, this function detects whether // the predicates are complements. -bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1, - MachineInstr *MI2) { +bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1, + MachineInstr &MI2) { // If we don't know the predicate sense of the instructions bail out early, we // need it later. if (getPredicateSense(MI1, HII) == PK_Unknown || @@ -844,7 +852,7 @@ bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1, return false; // Scheduling unit for candidate. - SUnit *SU = MIToSUnit[MI1]; + SUnit *SU = MIToSUnit[&MI1]; // One corner case deals with the following scenario: // Trying to add @@ -898,7 +906,7 @@ bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1, Hexagon::PredRegsRegClass.contains(PReg1) && Hexagon::PredRegsRegClass.contains(PReg2) && getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && - HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2); + HII->isDotNewInst(&MI1) == HII->isDotNewInst(&MI2); } // Initialize packetizer flags. @@ -911,31 +919,31 @@ void HexagonPacketizerList::initPacketizerState() { } // Ignore bundling of pseudo instructions. -bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI, - const MachineBasicBlock*) { - if (MI->isDebugValue()) +bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI, + const MachineBasicBlock *) { + if (MI.isDebugValue()) return true; - if (MI->isCFIInstruction()) + if (MI.isCFIInstruction()) return false; // We must print out inline assembly. - if (MI->isInlineAsm()) + if (MI.isInlineAsm()) return false; - if (MI->isImplicitDef()) + if (MI.isImplicitDef()) return false; // We check if MI has any functional units mapped to it. If it doesn't, // we ignore the instruction. - const MCInstrDesc& TID = MI->getDesc(); + const MCInstrDesc& TID = MI.getDesc(); auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass()); unsigned FuncUnits = IS->getUnits(); return !FuncUnits; } -bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) { - if (MI->isEHLabel() || MI->isCFIInstruction()) +bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) { + if (MI.isEHLabel() || MI.isCFIInstruction()) return true; // Consider inline asm to not be a solo instruction by default. @@ -943,19 +951,19 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) { // removed, and placed outside of the packet (before or after, depending // on dependencies). This is to reduce the impact of inline asm as a // "packet splitting" instruction. - if (MI->isInlineAsm() && !ScheduleInlineAsm) + if (MI.isInlineAsm() && !ScheduleInlineAsm) return true; // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: // trap, pause, barrier, icinva, isync, and syncht are solo instructions. // They must not be grouped with other instructions in a packet. - if (isSchedBarrier(MI)) + if (isSchedBarrier(&MI)) return true; - if (HII->isSolo(MI)) + if (HII->isSolo(&MI)) return true; - if (MI->getOpcode() == Hexagon::A2_nop) + if (MI.getOpcode() == Hexagon::A2_nop) return true; return false; @@ -1016,7 +1024,7 @@ void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { // after the bundle (to preserve the bundle semantics). bool InsertBeforeBundle; if (MI->isInlineAsm()) - InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI); + InsertBeforeBundle = !hasWriteToReadDep(*MI, *BundleIt, HRI); else if (MI->isDebugValue()) InsertBeforeBundle = true; else @@ -1045,7 +1053,7 @@ bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, // defining the same (dead) register. if (I->isCall() || J->isCall()) return false; - if (HII->isPredicated(I) || HII->isPredicated(J)) + if (HII->isPredicated(*I) || HII->isPredicated(*J)) return false; BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); @@ -1085,7 +1093,7 @@ bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool { if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) return true; - if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI)) + if (HII->isPredicated(*MI) && HII->isPredicatedNew(*MI) && HII->isJumpR(MI)) return true; return false; }; @@ -1139,7 +1147,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); // Solo instructions cannot go in the packet. - assert(!isSoloInstruction(I) && "Unexpected solo instr!"); + assert(!isSoloInstruction(*I) && "Unexpected solo instr!"); if (cannotCoexist(I, J)) return false; @@ -1158,12 +1166,12 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // If an instruction feeds new value jump, glue it. MachineBasicBlock::iterator NextMII = I; ++NextMII; - if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) { - MachineInstr *NextMI = NextMII; + if (NextMII != I->getParent()->end() && HII->isNewValueJump(&*NextMII)) { + MachineInstr &NextMI = *NextMII; bool secondRegMatch = false; - const MachineOperand &NOp0 = NextMI->getOperand(0); - const MachineOperand &NOp1 = NextMI->getOperand(1); + const MachineOperand &NOp0 = NextMI.getOperand(0); + const MachineOperand &NOp1 = NextMI.getOperand(1); if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg()) secondRegMatch = true; @@ -1242,7 +1250,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { RC = HRI->getMinimalPhysRegClass(DepReg); } - if (I->isCall() || I->isReturn()) { + if (I->isCall() || I->isReturn() || HII->isTailCall(I)) { if (!isRegDependence(DepType)) continue; if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) @@ -1275,8 +1283,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // For predicated instructions, if the predicates are complements then // there can be no dependence. - if (HII->isPredicated(I) && HII->isPredicated(J) && - arePredicatesComplements(I, J)) { + if (HII->isPredicated(*I) && HII->isPredicated(*J) && + arePredicatesComplements(*I, *J)) { // Not always safe to do this translation. // DAG Builder attempts to reduce dependence edges using transitive // nature of dependencies. Here is an example: @@ -1400,8 +1408,30 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { } } - // Skip over anti-dependences. Two instructions that are anti-dependent - // can share a packet. + // There are certain anti-dependencies that cannot be ignored. + // Specifically: + // J2_call ... %R0 ; SUJ + // R0 = ... ; SUI + // Those cannot be packetized together, since the call will observe + // the effect of the assignment to R0. + if (DepType == SDep::Anti && J->isCall()) { + // Check if I defines any volatile register. We should also check + // registers that the call may read, but these happen to be a + // subset of the volatile register set. + for (const MCPhysReg *P = J->getDesc().ImplicitDefs; P && *P; ++P) { + if (!I->modifiesRegister(*P, HRI)) + continue; + FoundSequentialDependence = true; + break; + } + } + + // Skip over remaining anti-dependences. Two instructions that are + // anti-dependent can share a packet, since in most such cases all + // operands are read before any modifications take place. + // The exceptions are branch and call instructions, since they are + // executed after all other instructions have completed (at least + // conceptually). if (DepType != SDep::Anti) { FoundSequentialDependence = true; break; @@ -1444,26 +1474,25 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { return false; } - MachineBasicBlock::iterator -HexagonPacketizerList::addToPacket(MachineInstr *MI) { +HexagonPacketizerList::addToPacket(MachineInstr &MI) { MachineBasicBlock::iterator MII = MI; - MachineBasicBlock *MBB = MI->getParent(); - if (MI->isImplicitDef()) { - unsigned R = MI->getOperand(0).getReg(); + MachineBasicBlock *MBB = MI.getParent(); + if (MI.isImplicitDef()) { + unsigned R = MI.getOperand(0).getReg(); if (Hexagon::IntRegsRegClass.contains(R)) { MCSuperRegIterator S(R, HRI, false); - MI->addOperand(MachineOperand::CreateReg(*S, true, true)); + MI.addOperand(MachineOperand::CreateReg(*S, true, true)); } return MII; } assert(ResourceTracker->canReserveResources(MI)); - bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); + bool ExtMI = HII->isExtended(&MI) || HII->isConstExtended(&MI); bool Good = true; if (GlueToNewValueJump) { - MachineInstr *NvjMI = ++MII; + MachineInstr &NvjMI = *++MII; // We need to put both instructions in the same packet: MI and NvjMI. // Either of them can require a constant extender. Try to add both to // the current packet, and if that fails, end the packet and start a @@ -1472,7 +1501,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) { if (ExtMI) Good = tryAllocateResourcesForConstExt(true); - bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI); + bool ExtNvjMI = HII->isExtended(&NvjMI) || HII->isConstExtended(&NvjMI); if (Good) { if (ResourceTracker->canReserveResources(NvjMI)) ResourceTracker->reserveResources(NvjMI); @@ -1497,8 +1526,8 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) { reserveResourcesForConstExt(); } } - CurrentPacketMIs.push_back(MI); - CurrentPacketMIs.push_back(NvjMI); + CurrentPacketMIs.push_back(&MI); + CurrentPacketMIs.push_back(&NvjMI); return MII; } @@ -1506,23 +1535,23 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) { if (ExtMI && !tryAllocateResourcesForConstExt(true)) { endPacket(MBB, MI); if (PromotedToDotNew) - demoteToDotOld(MI); + demoteToDotOld(&MI); ResourceTracker->reserveResources(MI); reserveResourcesForConstExt(); } - CurrentPacketMIs.push_back(MI); + CurrentPacketMIs.push_back(&MI); return MII; } void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, - MachineInstr *MI) { + MachineBasicBlock::iterator MI) { OldPacketMIs = CurrentPacketMIs; VLIWPacketizerList::endPacket(MBB, MI); } -bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) { - return !producesStall(MI); +bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { + return !producesStall(&MI); } @@ -1598,4 +1627,3 @@ bool HexagonPacketizerList::producesStall(const MachineInstr *I) { FunctionPass *llvm::createHexagonPacketizer() { return new HexagonPacketizer(); } - diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 960cf6ca5bbc..3f8ed5af3540 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -50,12 +50,12 @@ public: void initPacketizerState() override; // ignorePseudoInstruction - Ignore bundling of pseudo instructions. - bool ignorePseudoInstruction(const MachineInstr *MI, + bool ignorePseudoInstruction(const MachineInstr &MI, const MachineBasicBlock *MBB) override; // isSoloInstruction - return true if instruction MI can not be packetized // with any other instruction, which means that MI itself is a packet. - bool isSoloInstruction(const MachineInstr *MI) override; + bool isSoloInstruction(const MachineInstr &MI) override; // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ // together. @@ -65,9 +65,10 @@ public: // and SUJ. bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override; - void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override; - bool shouldAddToPacket(const MachineInstr *MI) override; + MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override; + void endPacket(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MI) override; + bool shouldAddToPacket(const MachineInstr &MI) override; void unpacketizeSoloInstrs(MachineFunction &MF); @@ -93,7 +94,7 @@ protected: bool canPromoteToNewValueStore(const MachineInstr* MI, const MachineInstr* PacketMI, unsigned DepReg); bool demoteToDotOld(MachineInstr* MI); - bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2); + bool arePredicatesComplements(MachineInstr &MI1, MachineInstr &MI2); bool restrictingDepExistInPacket(MachineInstr*, unsigned); bool isNewifiable(const MachineInstr *MI); bool isCurifiable(MachineInstr* MI); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index b73af8249cb5..2898b056a03d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -11,7 +11,10 @@ #include "HexagonFixupKinds.h" #include "HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCChecker.h" +#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -19,14 +22,20 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" +#include + using namespace llvm; using namespace Hexagon; #define DEBUG_TYPE "hexagon-asm-backend" +static cl::opt DisableFixup + ("mno-fixup", cl::desc("Disable fixing up resolved relocations for Hexagon")); + namespace { class HexagonAsmBackend : public MCAsmBackend { @@ -36,8 +45,21 @@ class HexagonAsmBackend : public MCAsmBackend { std::unique_ptr MCII; std::unique_ptr RelaxTarget; MCInst * Extender; + + void ReplaceInstruction(MCCodeEmitter &E, MCRelaxableFragment &RF, + MCInst &HMB) const { + SmallVector Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + E.encodeInstruction(HMB, VecOS, Fixups, RF.getSubtargetInfo()); + + // Update the fragment. + RF.setInst(HMB); + RF.getContents() = Code; + RF.getFixups() = Fixups; + } public: - HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) : + HexagonAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *), Extender(nullptr) {} @@ -63,118 +85,438 @@ public: const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { const static MCFixupKindInfo Infos[Hexagon::NumTargetFixupKinds] = { - // This table *must* be in same the order of fixup_* kinds in - // HexagonFixupKinds.h. - // - // namei offset bits flags - {"fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_LO16", 0, 32, 0}, - {"fixup_Hexagon_HI16", 0, 32, 0}, - {"fixup_Hexagon_32", 0, 32, 0}, - {"fixup_Hexagon_16", 0, 32, 0}, - {"fixup_Hexagon_8", 0, 32, 0}, - {"fixup_Hexagon_GPREL16_0", 0, 32, 0}, - {"fixup_Hexagon_GPREL16_1", 0, 32, 0}, - {"fixup_Hexagon_GPREL16_2", 0, 32, 0}, - {"fixup_Hexagon_GPREL16_3", 0, 32, 0}, - {"fixup_Hexagon_HL16", 0, 32, 0}, - {"fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_16_X", 0, 32, 0}, - {"fixup_Hexagon_12_X", 0, 32, 0}, - {"fixup_Hexagon_11_X", 0, 32, 0}, - {"fixup_Hexagon_10_X", 0, 32, 0}, - {"fixup_Hexagon_9_X", 0, 32, 0}, - {"fixup_Hexagon_8_X", 0, 32, 0}, - {"fixup_Hexagon_7_X", 0, 32, 0}, - {"fixup_Hexagon_6_X", 0, 32, 0}, - {"fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_COPY", 0, 32, 0}, - {"fixup_Hexagon_GLOB_DAT", 0, 32, 0}, - {"fixup_Hexagon_JMP_SLOT", 0, 32, 0}, - {"fixup_Hexagon_RELATIVE", 0, 32, 0}, - {"fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_GOTREL_LO16", 0, 32, 0}, - {"fixup_Hexagon_GOTREL_HI16", 0, 32, 0}, - {"fixup_Hexagon_GOTREL_32", 0, 32, 0}, - {"fixup_Hexagon_GOT_LO16", 0, 32, 0}, - {"fixup_Hexagon_GOT_HI16", 0, 32, 0}, - {"fixup_Hexagon_GOT_32", 0, 32, 0}, - {"fixup_Hexagon_GOT_16", 0, 32, 0}, - {"fixup_Hexagon_DTPMOD_32", 0, 32, 0}, - {"fixup_Hexagon_DTPREL_LO16", 0, 32, 0}, - {"fixup_Hexagon_DTPREL_HI16", 0, 32, 0}, - {"fixup_Hexagon_DTPREL_32", 0, 32, 0}, - {"fixup_Hexagon_DTPREL_16", 0, 32, 0}, - {"fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_GD_GOT_LO16", 0, 32, 0}, - {"fixup_Hexagon_GD_GOT_HI16", 0, 32, 0}, - {"fixup_Hexagon_GD_GOT_32", 0, 32, 0}, - {"fixup_Hexagon_GD_GOT_16", 0, 32, 0}, - {"fixup_Hexagon_LD_GOT_LO16", 0, 32, 0}, - {"fixup_Hexagon_LD_GOT_HI16", 0, 32, 0}, - {"fixup_Hexagon_LD_GOT_32", 0, 32, 0}, - {"fixup_Hexagon_LD_GOT_16", 0, 32, 0}, - {"fixup_Hexagon_IE_LO16", 0, 32, 0}, - {"fixup_Hexagon_IE_HI16", 0, 32, 0}, - {"fixup_Hexagon_IE_32", 0, 32, 0}, - {"fixup_Hexagon_IE_16", 0, 32, 0}, - {"fixup_Hexagon_IE_GOT_LO16", 0, 32, 0}, - {"fixup_Hexagon_IE_GOT_HI16", 0, 32, 0}, - {"fixup_Hexagon_IE_GOT_32", 0, 32, 0}, - {"fixup_Hexagon_IE_GOT_16", 0, 32, 0}, - {"fixup_Hexagon_TPREL_LO16", 0, 32, 0}, - {"fixup_Hexagon_TPREL_HI16", 0, 32, 0}, - {"fixup_Hexagon_TPREL_32", 0, 32, 0}, - {"fixup_Hexagon_TPREL_16", 0, 32, 0}, - {"fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, - {"fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_GOTREL_16_X", 0, 32, 0}, - {"fixup_Hexagon_GOTREL_11_X", 0, 32, 0}, - {"fixup_Hexagon_GOT_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_GOT_16_X", 0, 32, 0}, - {"fixup_Hexagon_GOT_11_X", 0, 32, 0}, - {"fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_DTPREL_16_X", 0, 32, 0}, - {"fixup_Hexagon_DTPREL_11_X", 0, 32, 0}, - {"fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_GD_GOT_16_X", 0, 32, 0}, - {"fixup_Hexagon_GD_GOT_11_X", 0, 32, 0}, - {"fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_LD_GOT_16_X", 0, 32, 0}, - {"fixup_Hexagon_LD_GOT_11_X", 0, 32, 0}, - {"fixup_Hexagon_IE_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_IE_16_X", 0, 32, 0}, - {"fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_IE_GOT_16_X", 0, 32, 0}, - {"fixup_Hexagon_IE_GOT_11_X", 0, 32, 0}, - {"fixup_Hexagon_TPREL_32_6_X", 0, 32, 0}, - {"fixup_Hexagon_TPREL_16_X", 0, 32, 0}, - {"fixup_Hexagon_TPREL_11_X", 0, 32, 0}}; - - if (Kind < FirstTargetFixupKind) { + // This table *must* be in same the order of fixup_* kinds in + // HexagonFixupKinds.h. + // + // namei offset bits flags + { "fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_LO16", 0, 32, 0 }, + { "fixup_Hexagon_HI16", 0, 32, 0 }, + { "fixup_Hexagon_32", 0, 32, 0 }, + { "fixup_Hexagon_16", 0, 32, 0 }, + { "fixup_Hexagon_8", 0, 32, 0 }, + { "fixup_Hexagon_GPREL16_0", 0, 32, 0 }, + { "fixup_Hexagon_GPREL16_1", 0, 32, 0 }, + { "fixup_Hexagon_GPREL16_2", 0, 32, 0 }, + { "fixup_Hexagon_GPREL16_3", 0, 32, 0 }, + { "fixup_Hexagon_HL16", 0, 32, 0 }, + { "fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_16_X", 0, 32, 0 }, + { "fixup_Hexagon_12_X", 0, 32, 0 }, + { "fixup_Hexagon_11_X", 0, 32, 0 }, + { "fixup_Hexagon_10_X", 0, 32, 0 }, + { "fixup_Hexagon_9_X", 0, 32, 0 }, + { "fixup_Hexagon_8_X", 0, 32, 0 }, + { "fixup_Hexagon_7_X", 0, 32, 0 }, + { "fixup_Hexagon_6_X", 0, 32, 0 }, + { "fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_COPY", 0, 32, 0 }, + { "fixup_Hexagon_GLOB_DAT", 0, 32, 0 }, + { "fixup_Hexagon_JMP_SLOT", 0, 32, 0 }, + { "fixup_Hexagon_RELATIVE", 0, 32, 0 }, + { "fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_GOTREL_LO16", 0, 32, 0 }, + { "fixup_Hexagon_GOTREL_HI16", 0, 32, 0 }, + { "fixup_Hexagon_GOTREL_32", 0, 32, 0 }, + { "fixup_Hexagon_GOT_LO16", 0, 32, 0 }, + { "fixup_Hexagon_GOT_HI16", 0, 32, 0 }, + { "fixup_Hexagon_GOT_32", 0, 32, 0 }, + { "fixup_Hexagon_GOT_16", 0, 32, 0 }, + { "fixup_Hexagon_DTPMOD_32", 0, 32, 0 }, + { "fixup_Hexagon_DTPREL_LO16", 0, 32, 0 }, + { "fixup_Hexagon_DTPREL_HI16", 0, 32, 0 }, + { "fixup_Hexagon_DTPREL_32", 0, 32, 0 }, + { "fixup_Hexagon_DTPREL_16", 0, 32, 0 }, + { "fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_GD_GOT_LO16", 0, 32, 0 }, + { "fixup_Hexagon_GD_GOT_HI16", 0, 32, 0 }, + { "fixup_Hexagon_GD_GOT_32", 0, 32, 0 }, + { "fixup_Hexagon_GD_GOT_16", 0, 32, 0 }, + { "fixup_Hexagon_LD_GOT_LO16", 0, 32, 0 }, + { "fixup_Hexagon_LD_GOT_HI16", 0, 32, 0 }, + { "fixup_Hexagon_LD_GOT_32", 0, 32, 0 }, + { "fixup_Hexagon_LD_GOT_16", 0, 32, 0 }, + { "fixup_Hexagon_IE_LO16", 0, 32, 0 }, + { "fixup_Hexagon_IE_HI16", 0, 32, 0 }, + { "fixup_Hexagon_IE_32", 0, 32, 0 }, + { "fixup_Hexagon_IE_16", 0, 32, 0 }, + { "fixup_Hexagon_IE_GOT_LO16", 0, 32, 0 }, + { "fixup_Hexagon_IE_GOT_HI16", 0, 32, 0 }, + { "fixup_Hexagon_IE_GOT_32", 0, 32, 0 }, + { "fixup_Hexagon_IE_GOT_16", 0, 32, 0 }, + { "fixup_Hexagon_TPREL_LO16", 0, 32, 0 }, + { "fixup_Hexagon_TPREL_HI16", 0, 32, 0 }, + { "fixup_Hexagon_TPREL_32", 0, 32, 0 }, + { "fixup_Hexagon_TPREL_16", 0, 32, 0 }, + { "fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_GOTREL_16_X", 0, 32, 0 }, + { "fixup_Hexagon_GOTREL_11_X", 0, 32, 0 }, + { "fixup_Hexagon_GOT_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_GOT_16_X", 0, 32, 0 }, + { "fixup_Hexagon_GOT_11_X", 0, 32, 0 }, + { "fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_DTPREL_16_X", 0, 32, 0 }, + { "fixup_Hexagon_DTPREL_11_X", 0, 32, 0 }, + { "fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_GD_GOT_16_X", 0, 32, 0 }, + { "fixup_Hexagon_GD_GOT_11_X", 0, 32, 0 }, + { "fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_LD_GOT_16_X", 0, 32, 0 }, + { "fixup_Hexagon_LD_GOT_11_X", 0, 32, 0 }, + { "fixup_Hexagon_IE_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_IE_16_X", 0, 32, 0 }, + { "fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_IE_GOT_16_X", 0, 32, 0 }, + { "fixup_Hexagon_IE_GOT_11_X", 0, 32, 0 }, + { "fixup_Hexagon_TPREL_32_6_X", 0, 32, 0 }, + { "fixup_Hexagon_TPREL_16_X", 0, 32, 0 }, + { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 } + }; + + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); - } assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } - void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/, - unsigned /*DataSize*/, uint64_t /*Value*/, - bool /*IsPCRel*/) const override { - return; + /// processFixupValue - Target hook to adjust the literal value of a fixup + /// if necessary. IsResolved signals whether the caller believes a relocation + /// is needed; the target can modify the value. The default does nothing. + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + const MCValue &Target, uint64_t &Value, + bool &IsResolved) override { + MCFixupKind Kind = Fixup.getKind(); + + switch((unsigned)Kind) { + default: + llvm_unreachable("Unknown Fixup Kind!"); + + case fixup_Hexagon_LO16: + case fixup_Hexagon_HI16: + case fixup_Hexagon_16: + case fixup_Hexagon_8: + case fixup_Hexagon_GPREL16_0: + case fixup_Hexagon_GPREL16_1: + case fixup_Hexagon_GPREL16_2: + case fixup_Hexagon_GPREL16_3: + case fixup_Hexagon_HL16: + case fixup_Hexagon_32_6_X: + case fixup_Hexagon_16_X: + case fixup_Hexagon_12_X: + case fixup_Hexagon_11_X: + case fixup_Hexagon_10_X: + case fixup_Hexagon_9_X: + case fixup_Hexagon_8_X: + case fixup_Hexagon_7_X: + case fixup_Hexagon_6_X: + case fixup_Hexagon_COPY: + case fixup_Hexagon_GLOB_DAT: + case fixup_Hexagon_JMP_SLOT: + case fixup_Hexagon_RELATIVE: + case fixup_Hexagon_PLT_B22_PCREL: + case fixup_Hexagon_GOTREL_LO16: + case fixup_Hexagon_GOTREL_HI16: + case fixup_Hexagon_GOTREL_32: + case fixup_Hexagon_GOT_LO16: + case fixup_Hexagon_GOT_HI16: + case fixup_Hexagon_GOT_32: + case fixup_Hexagon_GOT_16: + case fixup_Hexagon_DTPMOD_32: + case fixup_Hexagon_DTPREL_LO16: + case fixup_Hexagon_DTPREL_HI16: + case fixup_Hexagon_DTPREL_32: + case fixup_Hexagon_DTPREL_16: + case fixup_Hexagon_GD_PLT_B22_PCREL: + case fixup_Hexagon_LD_PLT_B22_PCREL: + case fixup_Hexagon_GD_GOT_LO16: + case fixup_Hexagon_GD_GOT_HI16: + case fixup_Hexagon_GD_GOT_32: + case fixup_Hexagon_GD_GOT_16: + case fixup_Hexagon_LD_GOT_LO16: + case fixup_Hexagon_LD_GOT_HI16: + case fixup_Hexagon_LD_GOT_32: + case fixup_Hexagon_LD_GOT_16: + case fixup_Hexagon_IE_LO16: + case fixup_Hexagon_IE_HI16: + case fixup_Hexagon_IE_32: + case fixup_Hexagon_IE_16: + case fixup_Hexagon_IE_GOT_LO16: + case fixup_Hexagon_IE_GOT_HI16: + case fixup_Hexagon_IE_GOT_32: + case fixup_Hexagon_IE_GOT_16: + case fixup_Hexagon_TPREL_LO16: + case fixup_Hexagon_TPREL_HI16: + case fixup_Hexagon_TPREL_32: + case fixup_Hexagon_TPREL_16: + case fixup_Hexagon_GOTREL_32_6_X: + case fixup_Hexagon_GOTREL_16_X: + case fixup_Hexagon_GOTREL_11_X: + case fixup_Hexagon_GOT_32_6_X: + case fixup_Hexagon_GOT_16_X: + case fixup_Hexagon_GOT_11_X: + case fixup_Hexagon_DTPREL_32_6_X: + case fixup_Hexagon_DTPREL_16_X: + case fixup_Hexagon_DTPREL_11_X: + case fixup_Hexagon_GD_GOT_32_6_X: + case fixup_Hexagon_GD_GOT_16_X: + case fixup_Hexagon_GD_GOT_11_X: + case fixup_Hexagon_LD_GOT_32_6_X: + case fixup_Hexagon_LD_GOT_16_X: + case fixup_Hexagon_LD_GOT_11_X: + case fixup_Hexagon_IE_32_6_X: + case fixup_Hexagon_IE_16_X: + case fixup_Hexagon_IE_GOT_32_6_X: + case fixup_Hexagon_IE_GOT_16_X: + case fixup_Hexagon_IE_GOT_11_X: + case fixup_Hexagon_TPREL_32_6_X: + case fixup_Hexagon_TPREL_16_X: + case fixup_Hexagon_TPREL_11_X: + case fixup_Hexagon_32_PCREL: + case fixup_Hexagon_6_PCREL_X: + case fixup_Hexagon_23_REG: + // These relocations should always have a relocation recorded + IsResolved = false; + return; + + case fixup_Hexagon_B22_PCREL: + //IsResolved = false; + break; + + case fixup_Hexagon_B13_PCREL: + case fixup_Hexagon_B13_PCREL_X: + case fixup_Hexagon_B32_PCREL_X: + case fixup_Hexagon_B22_PCREL_X: + case fixup_Hexagon_B15_PCREL: + case fixup_Hexagon_B15_PCREL_X: + case fixup_Hexagon_B9_PCREL: + case fixup_Hexagon_B9_PCREL_X: + case fixup_Hexagon_B7_PCREL: + case fixup_Hexagon_B7_PCREL_X: + if (DisableFixup) + IsResolved = false; + break; + + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_PCRel_4: + case fixup_Hexagon_32: + // Leave these relocations alone as they are used for EH. + return; + } + } + + /// getFixupKindNumBytes - The number of bytes the fixup may change. + static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + default: + return 0; + + case FK_Data_1: + return 1; + case FK_Data_2: + return 2; + case FK_Data_4: // this later gets mapped to R_HEX_32 + case FK_PCRel_4: // this later gets mapped to R_HEX_32_PCREL + case fixup_Hexagon_32: + case fixup_Hexagon_B32_PCREL_X: + case fixup_Hexagon_B22_PCREL: + case fixup_Hexagon_B22_PCREL_X: + case fixup_Hexagon_B15_PCREL: + case fixup_Hexagon_B15_PCREL_X: + case fixup_Hexagon_B13_PCREL: + case fixup_Hexagon_B13_PCREL_X: + case fixup_Hexagon_B9_PCREL: + case fixup_Hexagon_B9_PCREL_X: + case fixup_Hexagon_B7_PCREL: + case fixup_Hexagon_B7_PCREL_X: + return 4; + } + } + + // Make up for left shift when encoding the operand. + static uint64_t adjustFixupValue(MCFixupKind Kind, uint64_t Value) { + switch((unsigned)Kind) { + default: + break; + + case fixup_Hexagon_B7_PCREL: + case fixup_Hexagon_B9_PCREL: + case fixup_Hexagon_B13_PCREL: + case fixup_Hexagon_B15_PCREL: + case fixup_Hexagon_B22_PCREL: + Value >>= 2; + break; + + case fixup_Hexagon_B7_PCREL_X: + case fixup_Hexagon_B9_PCREL_X: + case fixup_Hexagon_B13_PCREL_X: + case fixup_Hexagon_B15_PCREL_X: + case fixup_Hexagon_B22_PCREL_X: + Value &= 0x3f; + break; + + case fixup_Hexagon_B32_PCREL_X: + Value >>= 6; + break; + } + return (Value); + } + + void HandleFixupError(const int bits, const int align_bits, + const int64_t FixupValue, const char *fixupStr) const { + // Error: value 1124 out of range: -1024-1023 when resolving + // symbol in file xprtsock.S + const APInt IntMin = APInt::getSignedMinValue(bits+align_bits); + const APInt IntMax = APInt::getSignedMaxValue(bits+align_bits); + std::stringstream errStr; + errStr << "\nError: value " << + FixupValue << + " out of range: " << + IntMin.getSExtValue() << + "-" << + IntMax.getSExtValue() << + " when resolving " << + fixupStr << + " fixup\n"; + llvm_unreachable(errStr.str().c_str()); + } + + /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided + /// data fragment, at the offset specified by the fixup and following the + /// fixup kind as appropriate. + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t FixupValue, bool IsPCRel) const override { + + // When FixupValue is 0 the relocation is external and there + // is nothing for us to do. + if (!FixupValue) return; + + MCFixupKind Kind = Fixup.getKind(); + uint64_t Value; + uint32_t InstMask; + uint32_t Reloc; + + // LLVM gives us an encoded value, we have to convert it back + // to a real offset before we can use it. + uint32_t Offset = Fixup.getOffset(); + unsigned NumBytes = getFixupKindNumBytes(Kind); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + char *InstAddr = Data + Offset; + + Value = adjustFixupValue(Kind, FixupValue); + if(!Value) + return; + int sValue = (int)Value; + + switch((unsigned)Kind) { + default: + return; + + case fixup_Hexagon_B7_PCREL: + if (!(isIntN(7, sValue))) + HandleFixupError(7, 2, (int64_t)FixupValue, "B7_PCREL"); + case fixup_Hexagon_B7_PCREL_X: + InstMask = 0x00001f18; // Word32_B7 + Reloc = (((Value >> 2) & 0x1f) << 8) | // Value 6-2 = Target 12-8 + ((Value & 0x3) << 3); // Value 1-0 = Target 4-3 + break; + + case fixup_Hexagon_B9_PCREL: + if (!(isIntN(9, sValue))) + HandleFixupError(9, 2, (int64_t)FixupValue, "B9_PCREL"); + case fixup_Hexagon_B9_PCREL_X: + InstMask = 0x003000fe; // Word32_B9 + Reloc = (((Value >> 7) & 0x3) << 20) | // Value 8-7 = Target 21-20 + ((Value & 0x7f) << 1); // Value 6-0 = Target 7-1 + break; + + // Since the existing branches that use this relocation cannot be + // extended, they should only be fixed up if the target is within range. + case fixup_Hexagon_B13_PCREL: + if (!(isIntN(13, sValue))) + HandleFixupError(13, 2, (int64_t)FixupValue, "B13_PCREL"); + case fixup_Hexagon_B13_PCREL_X: + InstMask = 0x00202ffe; // Word32_B13 + Reloc = (((Value >> 12) & 0x1) << 21) | // Value 12 = Target 21 + (((Value >> 11) & 0x1) << 13) | // Value 11 = Target 13 + ((Value & 0x7ff) << 1); // Value 10-0 = Target 11-1 + break; + + case fixup_Hexagon_B15_PCREL: + if (!(isIntN(15, sValue))) + HandleFixupError(15, 2, (int64_t)FixupValue, "B15_PCREL"); + case fixup_Hexagon_B15_PCREL_X: + InstMask = 0x00df20fe; // Word32_B15 + Reloc = (((Value >> 13) & 0x3) << 22) | // Value 14-13 = Target 23-22 + (((Value >> 8) & 0x1f) << 16) | // Value 12-8 = Target 20-16 + (((Value >> 7) & 0x1) << 13) | // Value 7 = Target 13 + ((Value & 0x7f) << 1); // Value 6-0 = Target 7-1 + break; + + case fixup_Hexagon_B22_PCREL: + if (!(isIntN(22, sValue))) + HandleFixupError(22, 2, (int64_t)FixupValue, "B22_PCREL"); + case fixup_Hexagon_B22_PCREL_X: + InstMask = 0x01ff3ffe; // Word32_B22 + Reloc = (((Value >> 13) & 0x1ff) << 16) | // Value 21-13 = Target 24-16 + ((Value & 0x1fff) << 1); // Value 12-0 = Target 13-1 + break; + + case fixup_Hexagon_B32_PCREL_X: + InstMask = 0x0fff3fff; // Word32_X26 + Reloc = (((Value >> 14) & 0xfff) << 16) | // Value 25-14 = Target 27-16 + (Value & 0x3fff); // Value 13-0 = Target 13-0 + break; + + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case fixup_Hexagon_32: + InstMask = 0xffffffff; // Word32 + Reloc = Value; + break; + } + + DEBUG(dbgs() << "Name=" << getFixupKindInfo(Kind).Name << "(" << + (unsigned)Kind << ")\n"); + DEBUG(uint32_t OldData = 0; + for (unsigned i = 0; i < NumBytes; i++) + OldData |= (InstAddr[i] << (i * 8)) & (0xff << (i * 8)); + dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) << + ": AValue=0x"; dbgs().write_hex(FixupValue) << + ": Offset=" << Offset << + ": Size=" << DataSize << + ": OInst=0x"; dbgs().write_hex(OldData) << + ": Reloc=0x"; dbgs().write_hex(Reloc);); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. The Value has been "split up" into the + // appropriate bitfields above. + for (unsigned i = 0; i < NumBytes; i++){ + InstAddr[i] &= uint8_t(~InstMask >> (i * 8)) & 0xff; // Clear reloc bits + InstAddr[i] |= uint8_t(Reloc >> (i * 8)) & 0xff; // Apply new reloc + } + + DEBUG(uint32_t NewData = 0; + for (unsigned i = 0; i < NumBytes; i++) + NewData |= (InstAddr[i] << (i * 8)) & (0xff << (i * 8)); + dbgs() << ": NInst=0x"; dbgs().write_hex(NewData) << "\n";); } bool isInstRelaxable(MCInst const &HMI) const { @@ -182,12 +524,20 @@ public: bool Relaxable = false; // Branches and loop-setup insns are handled as necessary by relaxation. if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ || + (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == + HexagonII::TypeCOMPOUND && + MCID.isBranch()) || (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV && MCID.isBranch()) || (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR && HMI.getOpcode() != Hexagon::C4_addipc)) - if (HexagonMCInstrInfo::isExtendable(*MCII, HMI)) + if (HexagonMCInstrInfo::isExtendable(*MCII, HMI)) { Relaxable = true; + MCOperand const &Operand = + HMI.getOperand(HexagonMCInstrInfo::getExtendableOp(*MCII, HMI)); + if (HexagonMCInstrInfo::mustNotExtend(*Operand.getExpr())) + Relaxable = false; + } return Relaxable; } @@ -197,17 +547,7 @@ public: /// /// \param Inst - The instruction to test. bool mayNeedRelaxation(MCInst const &Inst) const override { - assert(HexagonMCInstrInfo::isBundle(Inst)); - bool PreviousIsExtender = false; - for (auto const &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { - auto const &Inst = *I.getInst(); - if (!PreviousIsExtender) { - if (isInstRelaxable(Inst)) - return true; - } - PreviousIsExtender = HexagonMCInstrInfo::isImmext(Inst); - } - return false; + return true; } /// fixupNeedsRelaxation - Target specific predicate for whether a given @@ -222,6 +562,9 @@ public: *RelaxTarget = nullptr; MCInst &MCI = const_cast(HexagonMCInstrInfo::instruction( MCB, Fixup.getOffset() / HEXAGON_INSTR_SIZE)); + bool Relaxable = isInstRelaxable(MCI); + if (Relaxable == false) + return false; // If we cannot resolve the fixup value, it requires relaxation. if (!Resolved) { switch ((unsigned)Fixup.getKind()) { @@ -247,9 +590,6 @@ public: } } } - bool Relaxable = isInstRelaxable(MCI); - if (Relaxable == false) - return false; MCFixupKind Kind = Fixup.getKind(); int64_t sValue = Value; @@ -294,8 +634,8 @@ public: llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced"); } - void relaxInstruction(MCInst const & Inst, - MCInst & Res) const override { + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override { assert(HexagonMCInstrInfo::isBundle(Inst) && "Hexagon relaxInstruction only works on bundles"); @@ -347,6 +687,58 @@ public: } return true; } + + void finishLayout(MCAssembler const &Asm, + MCAsmLayout &Layout) const override { + for (auto I : Layout.getSectionOrder()) { + auto &Fragments = I->getFragmentList(); + for (auto &J : Fragments) { + switch (J.getKind()) { + default: + break; + case MCFragment::FT_Align: { + auto Size = Asm.computeFragmentSize(Layout, J); + for (auto K = J.getIterator(); + K != Fragments.begin() && Size >= HEXAGON_PACKET_SIZE;) { + --K; + switch (K->getKind()) { + default: + break; + case MCFragment::FT_Align: { + // Don't pad before other alignments + Size = 0; + break; + } + case MCFragment::FT_Relaxable: { + auto &RF = cast(*K); + auto &Inst = const_cast(RF.getInst()); + while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < 4) { + MCInst *Nop = new (Asm.getContext()) MCInst; + Nop->setOpcode(Hexagon::A2_nop); + Inst.addOperand(MCOperand::createInst(Nop)); + Size -= 4; + if (!HexagonMCChecker( + *MCII, RF.getSubtargetInfo(), Inst, Inst, + *Asm.getContext().getRegisterInfo()).check()) { + Inst.erase(Inst.end() - 1); + Size = 0; + } + } + bool Error = HexagonMCShuffle(*MCII, RF.getSubtargetInfo(), Inst); + //assert(!Error); + (void)Error; + ReplaceInstruction(Asm.getEmitter(), RF, Inst); + Layout.invalidateFragmentsFrom(&RF); + Size = 0; // Only look back one instruction + break; + } + } + } + } + } + } + } + } }; } // end anonymous namespace diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 47a6f8636276..c63f044b7128 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -227,7 +227,27 @@ namespace HexagonII { MO_LO16, MO_HI16, // Offset from the base of the SDA. - MO_GPREL + MO_GPREL, + + // MO_GDGOT - indicates GOT relative relocation for TLS + // GeneralDynamic method + MO_GDGOT, + + // MO_GDPLT - indicates PLT relative relocation for TLS + // GeneralDynamic method + MO_GDPLT, + + // MO_IE - indicates non PIC relocation for TLS + // Initial Executable method + MO_IE, + + // MO_IEGOT - indicates PIC relocation for TLS + // Initial Executable method + MO_IEGOT, + + // MO_TPREL - indicates relocation for TLS + // local Executable method + MO_TPREL }; // Hexagon Sub-instruction classes. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index da5d4d1da69b..944e235e72f2 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/HexagonFixupKinds.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -28,8 +29,8 @@ private: public: HexagonELFObjectWriter(uint8_t OSABI, StringRef C); - unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup, - bool IsPCRel) const override; + unsigned getRelocType(MCContext &Ctx, MCValue const &Target, + MCFixup const &Fixup, bool IsPCRel) const override; }; } @@ -38,20 +39,61 @@ HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C) /*HasRelocationAddend*/ true), CPU(C) {} -unsigned HexagonELFObjectWriter::GetRelocType(MCValue const & /*Target*/, +unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, + MCValue const &Target, MCFixup const &Fixup, bool IsPCRel) const { + MCSymbolRefExpr::VariantKind Variant = Target.getAccessVariant(); switch ((unsigned)Fixup.getKind()) { default: - DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n"); - llvm_unreachable("Unimplemented Fixup kind!"); - return ELF::R_HEX_NONE; + report_fatal_error("Unrecognized relocation type"); + break; case FK_Data_4: - return (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32; + switch(Variant) { + case MCSymbolRefExpr::VariantKind::VK_DTPREL: + return ELF::R_HEX_DTPREL_32; + case MCSymbolRefExpr::VariantKind::VK_GOT: + return ELF::R_HEX_GOT_32; + case MCSymbolRefExpr::VariantKind::VK_GOTREL: + return ELF::R_HEX_GOTREL_32; + case MCSymbolRefExpr::VariantKind::VK_Hexagon_GD_GOT: + return ELF::R_HEX_GD_GOT_32; + case MCSymbolRefExpr::VariantKind::VK_Hexagon_IE: + return ELF::R_HEX_IE_32; + case MCSymbolRefExpr::VariantKind::VK_Hexagon_IE_GOT: + return ELF::R_HEX_IE_GOT_32; + case MCSymbolRefExpr::VariantKind::VK_Hexagon_LD_GOT: + return ELF::R_HEX_LD_GOT_32; + case MCSymbolRefExpr::VariantKind::VK_Hexagon_PCREL: + return ELF::R_HEX_32_PCREL; + case MCSymbolRefExpr::VariantKind::VK_TPREL: + return ELF::R_HEX_TPREL_32; + case MCSymbolRefExpr::VariantKind::VK_None: + return IsPCRel ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32; + default: + report_fatal_error("Unrecognized variant type"); + }; case FK_PCRel_4: return ELF::R_HEX_32_PCREL; case FK_Data_2: - return ELF::R_HEX_16; + switch(Variant) { + case MCSymbolRefExpr::VariantKind::VK_DTPREL: + return ELF::R_HEX_DTPREL_16; + case MCSymbolRefExpr::VariantKind::VK_GOT: + return ELF::R_HEX_GOT_16; + case MCSymbolRefExpr::VariantKind::VK_Hexagon_GD_GOT: + return ELF::R_HEX_GD_GOT_16; + case MCSymbolRefExpr::VariantKind::VK_Hexagon_IE_GOT: + return ELF::R_HEX_IE_GOT_16; + case MCSymbolRefExpr::VariantKind::VK_Hexagon_LD_GOT: + return ELF::R_HEX_LD_GOT_16; + case MCSymbolRefExpr::VariantKind::VK_TPREL: + return ELF::R_HEX_TPREL_16; + case MCSymbolRefExpr::VariantKind::VK_None: + return ELF::R_HEX_16; + default: + report_fatal_error("Unrecognized variant type"); + }; case FK_Data_1: return ELF::R_HEX_8; case fixup_Hexagon_B22_PCREL: @@ -240,6 +282,8 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const & /*Target*/, return ELF::R_HEX_TPREL_16_X; case fixup_Hexagon_TPREL_11_X: return ELF::R_HEX_TPREL_11_X; + case fixup_Hexagon_23_REG: + return ELF::R_HEX_23_REG; } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h index 4bbfbec883c4..4c97ebbdd346 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h @@ -110,6 +110,7 @@ enum Fixups { fixup_Hexagon_TPREL_32_6_X, fixup_Hexagon_TPREL_16_X, fixup_Hexagon_TPREL_11_X, + fixup_Hexagon_23_REG, LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 06ccec532211..42fcc5a6aa89 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -79,7 +79,6 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, } if (HexagonMCInstrInfo::isOuterLoop(*MI)) { OS << Separator; - Separator = " "; MCInst ME; ME.setOpcode(Hexagon::ENDLOOP1); printInstruction(&ME, OS); @@ -203,16 +202,11 @@ void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo, void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O, bool hi) const { - MCOperand const &MO = MI->getOperand(OpNo); + assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand"); O << '#' << (hi ? "HI" : "LO") << '('; - if (MO.isImm()) { - O << '#'; - printOperand(MI, OpNo, O); - } else { - printOperand(MI, OpNo, O); - assert("Unknown symbol operand"); - } + O << '#'; + printOperand(MI, OpNo, O); O << ')'; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index 51d2f1c878dc..9e2c28076432 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -32,6 +32,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) { AscizDirective = "\t.string\t"; SupportsDebugInformation = true; + MinInstAlignment = 4; UsesELFSectionDirectiveForBSS = true; ExceptionsType = ExceptionHandling::DwarfCFI; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h index a8456b4ead9c..efeff2436234 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -14,7 +14,6 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 46b7b41fec3b..07c9ad96a0d7 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -16,7 +16,6 @@ #include "HexagonBaseInfo.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/CommandLine.h" @@ -117,6 +116,11 @@ void HexagonMCChecker::init(MCInst const& MCI) { for (unsigned i = 0; i < MCID.getNumDefs(); ++i) { unsigned R = MCI.getOperand(i).getReg(), S = Hexagon::NoRegister; + // USR has subregisters (while C8 does not for technical reasons), so + // reset R to USR, since we know how to handle multiple defs of USR, + // taking into account its subregisters. + if (R == Hexagon::C8) + R = Hexagon::USR; // Note register definitions, direct ones as well as indirect side-effects. // Super-registers are not tracked directly, but their components. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index 5fc0bdeaccbb..33e22798c954 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -15,10 +15,9 @@ #ifndef HEXAGONMCCHECKER_H #define HEXAGONMCCHECKER_H -#include -#include -#include #include "MCTargetDesc/HexagonMCShuffler.h" +#include +#include using namespace llvm; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 4b07ca7490a8..39b828d8a03a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -88,6 +88,19 @@ void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS, return; } +static bool RegisterMatches(unsigned Consumer, unsigned Producer, + unsigned Producer2) { + if (Consumer == Producer) + return true; + if (Consumer == Producer2) + return true; + // Calculate if we're a single vector consumer referencing a double producer + if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) + if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31) + return ((Consumer - Hexagon::V0) >> 1) == (Producer - Hexagon::W0); + return false; +} + /// EncodeSingleInstruction - Emit a single void HexagonMCCodeEmitter::EncodeSingleInstruction( const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, @@ -125,8 +138,10 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( MCOperand &MCO = HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB)); unsigned SOffset = 0; + unsigned VOffset = 0; unsigned Register = MCO.getReg(); unsigned Register1; + unsigned Register2; auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); auto i = Instructions.begin() + Index - 1; for (;; --i) { @@ -135,11 +150,18 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( if (HexagonMCInstrInfo::isImmext(Inst)) continue; ++SOffset; + if (HexagonMCInstrInfo::isVector(MCII, Inst)) + // Vector instructions don't count scalars + ++VOffset; Register1 = HexagonMCInstrInfo::hasNewValue(MCII, Inst) ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg() : static_cast(Hexagon::NoRegister); - if (Register != Register1) + Register2 = + HexagonMCInstrInfo::hasNewValue2(MCII, Inst) + ? HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg() + : static_cast(Hexagon::NoRegister); + if (!RegisterMatches(Register, Register1, Register2)) // This isn't the register we're looking for continue; if (!HexagonMCInstrInfo::isPredicated(MCII, Inst)) @@ -153,8 +175,11 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( break; } // Hexagon PRM 10.11 Construct Nt from distance - unsigned Offset = SOffset; + unsigned Offset = + HexagonMCInstrInfo::isVector(MCII, HMB) ? VOffset : SOffset; Offset <<= 1; + Offset |= + HexagonMCInstrInfo::SubregisterBit(Register, Register1, Register2); MCO.setReg(Offset + Hexagon::R0); } @@ -165,7 +190,6 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) && (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) && (HMB.getOpcode() != A4_ext_g))) { - // Use a A2_nop for unimplemented instructions. DEBUG(dbgs() << "Unimplemented inst: " " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" "\n"); @@ -251,7 +275,23 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( ++MCNumEmitted; } -static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, +namespace { +void raise_relocation_error(unsigned bits, unsigned kind) { + std::string Text; + { + llvm::raw_string_ostream Stream(Text); + Stream << "Unrecognized relocation combination bits: " << bits + << " kind: " << kind; + } + report_fatal_error(Text); +} +} + +/// getFixupNoBits - Some insns are not extended and thus have no +/// bits. These cases require a more brute force method for determining +/// the correct relocation. +namespace { +Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, const MCOperand &MO, const MCSymbolRefExpr::VariantKind kind) { const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI); @@ -259,83 +299,90 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, if (insnType == HexagonII::TypePREFIX) { switch (kind) { - case llvm::MCSymbolRefExpr::VK_GOTOFF: + case MCSymbolRefExpr::VK_GOTREL: return Hexagon::fixup_Hexagon_GOTREL_32_6_X; - case llvm::MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_GOT: return Hexagon::fixup_Hexagon_GOT_32_6_X; - case llvm::MCSymbolRefExpr::VK_TPREL: + case MCSymbolRefExpr::VK_TPREL: return Hexagon::fixup_Hexagon_TPREL_32_6_X; - case llvm::MCSymbolRefExpr::VK_DTPREL: + case MCSymbolRefExpr::VK_DTPREL: return Hexagon::fixup_Hexagon_DTPREL_32_6_X; - case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + case MCSymbolRefExpr::VK_Hexagon_GD_GOT: return Hexagon::fixup_Hexagon_GD_GOT_32_6_X; - case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + case MCSymbolRefExpr::VK_Hexagon_LD_GOT: return Hexagon::fixup_Hexagon_LD_GOT_32_6_X; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + case MCSymbolRefExpr::VK_Hexagon_IE: return Hexagon::fixup_Hexagon_IE_32_6_X; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + case MCSymbolRefExpr::VK_Hexagon_IE_GOT: return Hexagon::fixup_Hexagon_IE_GOT_32_6_X; - default: + case MCSymbolRefExpr::VK_Hexagon_PCREL: + case MCSymbolRefExpr::VK_None: if (MCID.isBranch()) return Hexagon::fixup_Hexagon_B32_PCREL_X; else return Hexagon::fixup_Hexagon_32_6_X; + default: + raise_relocation_error(0, kind); } } else if (MCID.isBranch()) - return (Hexagon::fixup_Hexagon_B13_PCREL); + return Hexagon::fixup_Hexagon_B13_PCREL; switch (MCID.getOpcode()) { case Hexagon::HI: case Hexagon::A2_tfrih: switch (kind) { - case llvm::MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_GOT: return Hexagon::fixup_Hexagon_GOT_HI16; - case llvm::MCSymbolRefExpr::VK_GOTOFF: + case MCSymbolRefExpr::VK_GOTREL: return Hexagon::fixup_Hexagon_GOTREL_HI16; - case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + case MCSymbolRefExpr::VK_Hexagon_GD_GOT: return Hexagon::fixup_Hexagon_GD_GOT_HI16; - case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + case MCSymbolRefExpr::VK_Hexagon_LD_GOT: return Hexagon::fixup_Hexagon_LD_GOT_HI16; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + case MCSymbolRefExpr::VK_Hexagon_IE: return Hexagon::fixup_Hexagon_IE_HI16; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + case MCSymbolRefExpr::VK_Hexagon_IE_GOT: return Hexagon::fixup_Hexagon_IE_GOT_HI16; - case llvm::MCSymbolRefExpr::VK_TPREL: + case MCSymbolRefExpr::VK_TPREL: return Hexagon::fixup_Hexagon_TPREL_HI16; - case llvm::MCSymbolRefExpr::VK_DTPREL: + case MCSymbolRefExpr::VK_DTPREL: return Hexagon::fixup_Hexagon_DTPREL_HI16; - default: + case MCSymbolRefExpr::VK_None: return Hexagon::fixup_Hexagon_HI16; + default: + raise_relocation_error(0, kind); } case Hexagon::LO: case Hexagon::A2_tfril: switch (kind) { - case llvm::MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_GOT: return Hexagon::fixup_Hexagon_GOT_LO16; - case llvm::MCSymbolRefExpr::VK_GOTOFF: + case MCSymbolRefExpr::VK_GOTREL: return Hexagon::fixup_Hexagon_GOTREL_LO16; - case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + case MCSymbolRefExpr::VK_Hexagon_GD_GOT: return Hexagon::fixup_Hexagon_GD_GOT_LO16; - case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + case MCSymbolRefExpr::VK_Hexagon_LD_GOT: return Hexagon::fixup_Hexagon_LD_GOT_LO16; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + case MCSymbolRefExpr::VK_Hexagon_IE: return Hexagon::fixup_Hexagon_IE_LO16; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + case MCSymbolRefExpr::VK_Hexagon_IE_GOT: return Hexagon::fixup_Hexagon_IE_GOT_LO16; - case llvm::MCSymbolRefExpr::VK_TPREL: + case MCSymbolRefExpr::VK_TPREL: return Hexagon::fixup_Hexagon_TPREL_LO16; - case llvm::MCSymbolRefExpr::VK_DTPREL: + case MCSymbolRefExpr::VK_DTPREL: return Hexagon::fixup_Hexagon_DTPREL_LO16; - default: + case MCSymbolRefExpr::VK_None: return Hexagon::fixup_Hexagon_LO16; + default: + raise_relocation_error(0, kind); } // The only relocs left should be GP relative: default: if (MCID.mayStore() || MCID.mayLoad()) { - for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); - ImpUses && *ImpUses; ++ImpUses) { + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; + ++ImpUses) { if (*ImpUses != Hexagon::GP) continue; switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { @@ -348,14 +395,14 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, case HexagonII::MemAccessSize::DoubleWordAccess: return fixup_Hexagon_GPREL16_3; default: - llvm_unreachable("unhandled fixup"); + raise_relocation_error(0, kind); } } - } else - llvm_unreachable("unhandled fixup"); + } + raise_relocation_error(0, kind); } - - return LastTargetFixupKind; + llvm_unreachable("Relocation exit not taken"); +} } namespace llvm { @@ -395,23 +442,18 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCSubtargetInfo &STI) const { - int64_t Res; - - if (ME->evaluateAsAbsolute(Res)) - return Res; - - MCExpr::ExprKind MK = ME->getKind(); - if (MK == MCExpr::Constant) { - return cast(ME)->getValue(); - } - if (MK == MCExpr::Binary) { - getExprOpValue(MI, MO, cast(ME)->getLHS(), Fixups, STI); - getExprOpValue(MI, MO, cast(ME)->getRHS(), Fixups, STI); + if (isa(ME)) + ME = &HexagonMCInstrInfo::getExpr(*ME); + int64_t Value; + if (ME->evaluateAsAbsolute(Value)) + return Value; + assert(ME->getKind() == MCExpr::SymbolRef || ME->getKind() == MCExpr::Binary); + if (ME->getKind() == MCExpr::Binary) { + MCBinaryExpr const *Binary = cast(ME); + getExprOpValue(MI, MO, Binary->getLHS(), Fixups, STI); + getExprOpValue(MI, MO, Binary->getRHS(), Fixups, STI); return 0; } - - assert(MK == MCExpr::SymbolRef); - Hexagon::Fixups FixupKind = Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16); const MCSymbolRefExpr *MCSRE = static_cast(ME); @@ -430,275 +472,302 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, switch (bits) { default: - DEBUG(dbgs() << "unrecognized bit count of " << bits << '\n'); - break; - + raise_relocation_error(bits, kind); case 32: switch (kind) { - case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL: - FixupKind = Hexagon::fixup_Hexagon_32_PCREL; + case MCSymbolRefExpr::VK_DTPREL: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X + : Hexagon::fixup_Hexagon_DTPREL_32; break; - case llvm::MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_GOT: FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X : Hexagon::fixup_Hexagon_GOT_32; break; - case llvm::MCSymbolRefExpr::VK_GOTOFF: + case MCSymbolRefExpr::VK_GOTREL: FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X : Hexagon::fixup_Hexagon_GOTREL_32; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + case MCSymbolRefExpr::VK_Hexagon_GD_GOT: FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X : Hexagon::fixup_Hexagon_GD_GOT_32; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X - : Hexagon::fixup_Hexagon_LD_GOT_32; - break; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + case MCSymbolRefExpr::VK_Hexagon_IE: FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X : Hexagon::fixup_Hexagon_IE_32; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + case MCSymbolRefExpr::VK_Hexagon_IE_GOT: FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X : Hexagon::fixup_Hexagon_IE_GOT_32; break; - case llvm::MCSymbolRefExpr::VK_TPREL: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X - : Hexagon::fixup_Hexagon_TPREL_32; + case MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X + : Hexagon::fixup_Hexagon_LD_GOT_32; break; - case llvm::MCSymbolRefExpr::VK_DTPREL: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X - : Hexagon::fixup_Hexagon_DTPREL_32; + case MCSymbolRefExpr::VK_Hexagon_PCREL: + FixupKind = Hexagon::fixup_Hexagon_32_PCREL; break; - default: + case MCSymbolRefExpr::VK_None: FixupKind = *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32; break; + case MCSymbolRefExpr::VK_TPREL: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X + : Hexagon::fixup_Hexagon_TPREL_32; + break; + default: + raise_relocation_error(bits, kind); } break; case 22: switch (kind) { - case llvm::MCSymbolRefExpr::VK_Hexagon_GD_PLT: + case MCSymbolRefExpr::VK_Hexagon_GD_PLT: FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_LD_PLT: + case MCSymbolRefExpr::VK_Hexagon_LD_PLT: FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; break; - default: - if (MCID.isBranch() || MCID.isCall()) { - FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X - : Hexagon::fixup_Hexagon_B22_PCREL; - } else { - errs() << "unrecognized relocation, bits: " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; - } + case MCSymbolRefExpr::VK_None: + FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X + : Hexagon::fixup_Hexagon_B22_PCREL; + break; + case MCSymbolRefExpr::VK_PLT: + FixupKind = Hexagon::fixup_Hexagon_PLT_B22_PCREL; break; + default: + raise_relocation_error(bits, kind); } break; case 16: if (*Extended) { switch (kind) { - default: - FixupKind = Hexagon::fixup_Hexagon_16_X; + case MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X; break; - case llvm::MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_GOT: FixupKind = Hexagon::fixup_Hexagon_GOT_16_X; break; - case llvm::MCSymbolRefExpr::VK_GOTOFF: + case MCSymbolRefExpr::VK_GOTREL: FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + case MCSymbolRefExpr::VK_Hexagon_GD_GOT: FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: - FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X; - break; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE: + case MCSymbolRefExpr::VK_Hexagon_IE: FixupKind = Hexagon::fixup_Hexagon_IE_16_X; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: + case MCSymbolRefExpr::VK_Hexagon_IE_GOT: FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X; break; - case llvm::MCSymbolRefExpr::VK_TPREL: - FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X; + case MCSymbolRefExpr::VK_Hexagon_LD_GOT: + FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X; break; - case llvm::MCSymbolRefExpr::VK_DTPREL: - FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X; + case MCSymbolRefExpr::VK_None: + FixupKind = Hexagon::fixup_Hexagon_16_X; + break; + case MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X; break; + default: + raise_relocation_error(bits, kind); } } else switch (kind) { - default: - errs() << "unrecognized relocation, bits " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + case MCSymbolRefExpr::VK_None: { + if (HexagonMCInstrInfo::s23_2_reloc(*MO.getExpr())) + FixupKind = Hexagon::fixup_Hexagon_23_REG; + else + raise_relocation_error(bits, kind); + break; + } + case MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_16; break; - case llvm::MCSymbolRefExpr::VK_GOTOFF: - if ((MCID.getOpcode() == Hexagon::HI) || - (MCID.getOpcode() == Hexagon::LO_H)) + case MCSymbolRefExpr::VK_GOTREL: + if (MCID.getOpcode() == Hexagon::HI) FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16; else FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_GPREL: - FixupKind = Hexagon::fixup_Hexagon_GPREL16_0; + case MCSymbolRefExpr::VK_Hexagon_GD_GOT: + FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_LO16: - FixupKind = Hexagon::fixup_Hexagon_LO16; + case MCSymbolRefExpr::VK_Hexagon_GPREL: + FixupKind = Hexagon::fixup_Hexagon_GPREL16_0; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_HI16: + case MCSymbolRefExpr::VK_Hexagon_HI16: FixupKind = Hexagon::fixup_Hexagon_HI16; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: - FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16; + case MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + case MCSymbolRefExpr::VK_Hexagon_LD_GOT: FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: - FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16; + case MCSymbolRefExpr::VK_Hexagon_LO16: + FixupKind = Hexagon::fixup_Hexagon_LO16; break; - case llvm::MCSymbolRefExpr::VK_TPREL: + case MCSymbolRefExpr::VK_TPREL: FixupKind = Hexagon::fixup_Hexagon_TPREL_16; break; - case llvm::MCSymbolRefExpr::VK_DTPREL: - FixupKind = Hexagon::fixup_Hexagon_DTPREL_16; - break; + default: + raise_relocation_error(bits, kind); } break; case 15: - if (MCID.isBranch() || MCID.isCall()) + switch (kind) { + case MCSymbolRefExpr::VK_None: FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X : Hexagon::fixup_Hexagon_B15_PCREL; + break; + default: + raise_relocation_error(bits, kind); + } break; case 13: - if (MCID.isBranch()) + switch (kind) { + case MCSymbolRefExpr::VK_None: FixupKind = Hexagon::fixup_Hexagon_B13_PCREL; - else { - errs() << "unrecognized relocation, bits " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + break; + default: + raise_relocation_error(bits, kind); } break; case 12: if (*Extended) switch (kind) { - default: - FixupKind = Hexagon::fixup_Hexagon_12_X; - break; // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26 - case llvm::MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_GOT: FixupKind = Hexagon::fixup_Hexagon_GOT_16_X; break; - case llvm::MCSymbolRefExpr::VK_GOTOFF: + case MCSymbolRefExpr::VK_GOTREL: FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X; break; + case MCSymbolRefExpr::VK_None: + FixupKind = Hexagon::fixup_Hexagon_12_X; + break; + default: + raise_relocation_error(bits, kind); } - else { - errs() << "unrecognized relocation, bits " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; - } + else + raise_relocation_error(bits, kind); break; case 11: if (*Extended) switch (kind) { - default: - FixupKind = Hexagon::fixup_Hexagon_11_X; + case MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X; break; - case llvm::MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_GOT: FixupKind = Hexagon::fixup_Hexagon_GOT_11_X; break; - case llvm::MCSymbolRefExpr::VK_GOTOFF: + case MCSymbolRefExpr::VK_GOTREL: FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_GD_GOT: + case MCSymbolRefExpr::VK_Hexagon_GD_GOT: FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_LD_GOT: + case MCSymbolRefExpr::VK_Hexagon_IE_GOT: + FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X; + break; + case MCSymbolRefExpr::VK_Hexagon_LD_GOT: FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X; break; - case llvm::MCSymbolRefExpr::VK_Hexagon_IE_GOT: - FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X; + case MCSymbolRefExpr::VK_None: + FixupKind = Hexagon::fixup_Hexagon_11_X; break; - case llvm::MCSymbolRefExpr::VK_TPREL: + case MCSymbolRefExpr::VK_TPREL: FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X; break; - case llvm::MCSymbolRefExpr::VK_DTPREL: - FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X; - break; + default: + raise_relocation_error(bits, kind); } else { - errs() << "unrecognized relocation, bits " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; + switch (kind) { + case MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X; + break; + default: + raise_relocation_error(bits, kind); + } } break; case 10: - if (*Extended) - FixupKind = Hexagon::fixup_Hexagon_10_X; + if (*Extended) { + switch (kind) { + case MCSymbolRefExpr::VK_None: + FixupKind = Hexagon::fixup_Hexagon_10_X; + break; + default: + raise_relocation_error(bits, kind); + } + } else + raise_relocation_error(bits, kind); break; case 9: if (MCID.isBranch() || - (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) + (HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X : Hexagon::fixup_Hexagon_B9_PCREL; else if (*Extended) FixupKind = Hexagon::fixup_Hexagon_9_X; - else { - errs() << "unrecognized relocation, bits " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; - } + else + raise_relocation_error(bits, kind); break; case 8: if (*Extended) FixupKind = Hexagon::fixup_Hexagon_8_X; - else { - errs() << "unrecognized relocation, bits " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; - } + else + raise_relocation_error(bits, kind); break; case 7: if (MCID.isBranch() || - (llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) + (HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X : Hexagon::fixup_Hexagon_B7_PCREL; else if (*Extended) FixupKind = Hexagon::fixup_Hexagon_7_X; - else { - errs() << "unrecognized relocation, bits " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; - } + else + raise_relocation_error(bits, kind); break; case 6: if (*Extended) { switch (kind) { - default: - FixupKind = Hexagon::fixup_Hexagon_6_X; - break; - case llvm::MCSymbolRefExpr::VK_Hexagon_PCREL: - FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X; + case MCSymbolRefExpr::VK_DTPREL: + FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X; break; // This is part of an extender, GOT_11 is a // Word32_U6 unsigned/truncated reloc. - case llvm::MCSymbolRefExpr::VK_GOT: + case MCSymbolRefExpr::VK_GOT: FixupKind = Hexagon::fixup_Hexagon_GOT_11_X; break; - case llvm::MCSymbolRefExpr::VK_GOTOFF: + case MCSymbolRefExpr::VK_GOTREL: FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X; break; + case MCSymbolRefExpr::VK_Hexagon_PCREL: + FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X; + break; + case MCSymbolRefExpr::VK_TPREL: + FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X; + break; + case MCSymbolRefExpr::VK_None: + FixupKind = Hexagon::fixup_Hexagon_6_X; + break; + default: + raise_relocation_error(bits, kind); } - } else { - errs() << "unrecognized relocation, bits " << bits << "\n"; - errs() << "name = " << HexagonMCInstrInfo::getName(MCII, MI) << "\n"; - } + } else + raise_relocation_error(bits, kind); break; case 0: @@ -706,29 +775,39 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, break; } - MCExpr const *FixupExpression = (*Addend > 0 && isPCRel(FixupKind)) ? - MCBinaryExpr::createAdd(MO.getExpr(), - MCConstantExpr::create(*Addend, MCT), MCT) : - MO.getExpr(); + MCExpr const *FixupExpression = + (*Addend > 0 && isPCRel(FixupKind)) + ? MCBinaryExpr::createAdd(MO.getExpr(), + MCConstantExpr::create(*Addend, MCT), MCT) + : MO.getExpr(); - MCFixup fixup = MCFixup::create(*Addend, FixupExpression, + MCFixup fixup = MCFixup::create(*Addend, FixupExpression, MCFixupKind(FixupKind), MI.getLoc()); Fixups.push_back(fixup); // All of the information is in the fixup. - return (0); + return 0; } unsigned HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, SmallVectorImpl &Fixups, MCSubtargetInfo const &STI) const { - if (MO.isReg()) - return MCT.getRegisterInfo()->getEncodingValue(MO.getReg()); - if (MO.isImm()) - return static_cast(MO.getImm()); + assert(!MO.isImm()); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (HexagonMCInstrInfo::isSubInstruction(MI)) + return HexagonMCInstrInfo::getDuplexRegisterNumbering(Reg); + switch(MI.getOpcode()){ + case Hexagon::A2_tfrrcr: + case Hexagon::A2_tfrcrr: + if(Reg == Hexagon::M0) + Reg = Hexagon::C6; + if(Reg == Hexagon::M1) + Reg = Hexagon::C7; + } + return MCT.getRegisterInfo()->getEncodingValue(Reg); + } - // MO must be an ME. - assert(MO.isExpr()); return getExprOpValue(MI, MO, MO.getExpr(), Fixups, STI); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index e6194f61a6ba..88336217cc8d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -80,9 +80,6 @@ static const std::pair opcodeData[] = { std::make_pair((unsigned)V4_SS2_storewi0, 4096), std::make_pair((unsigned)V4_SS2_storewi1, 4352)}; -static std::map - subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData)); - bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) { switch (Ga) { case HexagonII::HSIG_None: @@ -587,6 +584,9 @@ bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII, unsigned MIaG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIa), MIbG = HexagonMCInstrInfo::getDuplexCandidateGroup(MIb); + static std::map subinstOpcodeMap(std::begin(opcodeData), + std::end(opcodeData)); + // If a duplex contains 2 insns in the same group, the insns must be // ordered such that the numerically smaller opcode is in slot 1. if ((MIaG != HexagonII::HSIG_None) && (MIaG == MIbG) && bisReversable) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index eaa3550d07f6..67dcb8fea739 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -107,15 +107,20 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, ((AccessSize == 0) || (Size == 0) || (Size > GPSize)) ? ".bss" : sbss[(Log2_64(AccessSize))]; - - MCSection *CrntSection = getCurrentSection().first; - MCSection *Section = getAssembler().getContext().getELFSection( + MCSection &Section = *getAssembler().getContext().getELFSection( SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); - SwitchSection(Section); - AssignFragment(Symbol, getCurrentFragment()); + MCSectionSubPair P = getCurrentSection(); + SwitchSection(&Section); + + EmitValueToAlignment(ByteAlignment, 0, 1, 0); + EmitLabel(Symbol); + EmitZeros(Size); + + // Update the maximum alignment of the section if necessary. + if (ByteAlignment > Section.getAlignment()) + Section.setAlignment(ByteAlignment); - MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment); - SwitchSection(CrntSection); + SwitchSection(P.first, P.second); } else { if (ELFSymbol->declareCommon(Size, ByteAlignment)) report_fatal_error("Symbol: " + Symbol->getName() + diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp index fc6262657514..e93906a0a396 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp @@ -10,6 +10,7 @@ #include "HexagonMCExpr.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/raw_ostream.h" @@ -17,33 +18,61 @@ using namespace llvm; #define DEBUG_TYPE "hexagon-mcexpr" -HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr, - MCContext &Ctx) { - return new (Ctx) HexagonNoExtendOperand(Expr); +HexagonMCExpr *HexagonMCExpr::create(MCExpr const *Expr, MCContext &Ctx) { + return new (Ctx) HexagonMCExpr(Expr); } -bool HexagonNoExtendOperand::evaluateAsRelocatableImpl( - MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const { +bool HexagonMCExpr::evaluateAsRelocatableImpl(MCValue &Res, + MCAsmLayout const *Layout, + MCFixup const *Fixup) const { return Expr->evaluateAsRelocatable(Res, Layout, Fixup); } -void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {} +void HexagonMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*Expr); +} -MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const { +MCFragment *llvm::HexagonMCExpr::findAssociatedFragment() const { return Expr->findAssociatedFragment(); } -void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} +void HexagonMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + +MCExpr const *HexagonMCExpr::getExpr() const { return Expr; } + +void HexagonMCExpr::setMustExtend(bool Val) { + assert((!Val || !MustNotExtend) && "Extension contradiction"); + MustExtend = Val; +} + +bool HexagonMCExpr::mustExtend() const { return MustExtend; } +void HexagonMCExpr::setMustNotExtend(bool Val) { + assert((!Val || !MustExtend) && "Extension contradiction"); + MustNotExtend = Val; +} +bool HexagonMCExpr::mustNotExtend() const { return MustNotExtend; } -MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; } +bool HexagonMCExpr::s23_2_reloc() const { return S23_2_reloc; } +void HexagonMCExpr::setS23_2_reloc(bool Val) { + S23_2_reloc = Val; +} -bool HexagonNoExtendOperand::classof(MCExpr const *E) { +bool HexagonMCExpr::classof(MCExpr const *E) { return E->getKind() == MCExpr::Target; } -HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr) - : Expr(Expr) {} +HexagonMCExpr::HexagonMCExpr(MCExpr const *Expr) + : Expr(Expr), MustNotExtend(false), MustExtend(false), S23_2_reloc(false), + SignMismatch(false) {} -void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { +void HexagonMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { Expr->print(OS, MAI); } + +void HexagonMCExpr::setSignMismatch(bool Val) { + SignMismatch = Val; +} + +bool HexagonMCExpr::signMismatch() const { + return SignMismatch; +} \ No newline at end of file diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h index 60f180fb2bc4..bca40cfaf6f4 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h @@ -14,9 +14,9 @@ namespace llvm { class MCInst; -class HexagonNoExtendOperand : public MCTargetExpr { +class HexagonMCExpr : public MCTargetExpr { public: - static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx); + static HexagonMCExpr *create(MCExpr const *Expr, MCContext &Ctx); void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override; @@ -25,10 +25,22 @@ public: void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; static bool classof(MCExpr const *E); MCExpr const *getExpr() const; + void setMustExtend(bool Val = true); + bool mustExtend() const; + void setMustNotExtend(bool Val = true); + bool mustNotExtend() const; + void setS23_2_reloc(bool Val = true); + bool s23_2_reloc() const; + void setSignMismatch(bool Val = true); + bool signMismatch() const; private: - HexagonNoExtendOperand(MCExpr const *Expr); + HexagonMCExpr(MCExpr const *Expr); MCExpr const *Expr; + bool MustNotExtend; + bool MustExtend; + bool S23_2_reloc; + bool SignMismatch; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index e6842076db2a..941cbd6dc35d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -99,7 +99,8 @@ void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, int64_t Value; if (exOp.getExpr()->evaluateAsAbsolute(Value)) { unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI); - exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context)); + exOp.setExpr(HexagonMCExpr::create( + MCConstantExpr::create((Value & 0x3f) << Shift, Context), Context)); } } @@ -159,8 +160,8 @@ MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB, void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, - MCInst const &MCI, bool MustExtend) { - if (isConstExtended(MCII, MCI) || MustExtend) + MCInst const &MCI) { + if (isConstExtended(MCII, MCI)) addConstExtender(Context, MCII, MCB, MCI); } @@ -190,6 +191,61 @@ MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII, return (MCII.get(MCI.getOpcode())); } +unsigned HexagonMCInstrInfo::getDuplexRegisterNumbering(unsigned Reg) { + using namespace Hexagon; + switch (Reg) { + default: + llvm_unreachable("unknown duplex register"); + // Rs Rss + case R0: + case D0: + return 0; + case R1: + case D1: + return 1; + case R2: + case D2: + return 2; + case R3: + case D3: + return 3; + case R4: + case D8: + return 4; + case R5: + case D9: + return 5; + case R6: + case D10: + return 6; + case R7: + case D11: + return 7; + case R16: + return 8; + case R17: + return 9; + case R18: + return 10; + case R19: + return 11; + case R20: + return 12; + case R21: + return 13; + case R22: + return 14; + case R23: + return 15; + } +} + +MCExpr const &HexagonMCInstrInfo::getExpr(MCExpr const &Expr) { + const auto &HExpr = cast(Expr); + assert(HExpr.getExpr()); + return *HExpr.getExpr(); +} + unsigned short HexagonMCInstrInfo::getExtendableOp(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -401,6 +457,12 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI) { if (HexagonMCInstrInfo::isExtended(MCII, MCI)) return true; + if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) + return false; + MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI); + if (isa(MO.getExpr()) && + HexagonMCInstrInfo::mustExtend(*MO.getExpr())) + return true; // Branch insns are handled as necessary by relaxation. if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) || (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND && @@ -412,18 +474,11 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) && (MCI.getOpcode() != Hexagon::C4_addipc)) return false; - else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) - return false; - MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI); - - // We could be using an instruction with an extendable immediate and shoehorn - // a global address into it. If it is a global address it will be constant - // extended. We do this for COMBINE. - // We currently only handle isGlobal() because it is the only kind of - // object we are going to end up with here for now. - // In the future we probably should add isSymbol(), etc. assert(!MO.isImm()); + if (isa(MO.getExpr()) && + HexagonMCInstrInfo::mustNotExtend(*MO.getExpr())) + return false; int64_t Value; if (!MO.getExpr()->evaluateAsAbsolute(Value)) return true; @@ -543,6 +598,66 @@ bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) { return (Flags & memStoreReorderEnabledMask) != 0; } +bool HexagonMCInstrInfo::isSubInstruction(MCInst const &MCI) { + switch (MCI.getOpcode()) { + default: + return false; + case Hexagon::V4_SA1_addi: + case Hexagon::V4_SA1_addrx: + case Hexagon::V4_SA1_addsp: + case Hexagon::V4_SA1_and1: + case Hexagon::V4_SA1_clrf: + case Hexagon::V4_SA1_clrfnew: + case Hexagon::V4_SA1_clrt: + case Hexagon::V4_SA1_clrtnew: + case Hexagon::V4_SA1_cmpeqi: + case Hexagon::V4_SA1_combine0i: + case Hexagon::V4_SA1_combine1i: + case Hexagon::V4_SA1_combine2i: + case Hexagon::V4_SA1_combine3i: + case Hexagon::V4_SA1_combinerz: + case Hexagon::V4_SA1_combinezr: + case Hexagon::V4_SA1_dec: + case Hexagon::V4_SA1_inc: + case Hexagon::V4_SA1_seti: + case Hexagon::V4_SA1_setin1: + case Hexagon::V4_SA1_sxtb: + case Hexagon::V4_SA1_sxth: + case Hexagon::V4_SA1_tfr: + case Hexagon::V4_SA1_zxtb: + case Hexagon::V4_SA1_zxth: + case Hexagon::V4_SL1_loadri_io: + case Hexagon::V4_SL1_loadrub_io: + case Hexagon::V4_SL2_deallocframe: + case Hexagon::V4_SL2_jumpr31: + case Hexagon::V4_SL2_jumpr31_f: + case Hexagon::V4_SL2_jumpr31_fnew: + case Hexagon::V4_SL2_jumpr31_t: + case Hexagon::V4_SL2_jumpr31_tnew: + case Hexagon::V4_SL2_loadrb_io: + case Hexagon::V4_SL2_loadrd_sp: + case Hexagon::V4_SL2_loadrh_io: + case Hexagon::V4_SL2_loadri_sp: + case Hexagon::V4_SL2_loadruh_io: + case Hexagon::V4_SL2_return: + case Hexagon::V4_SL2_return_f: + case Hexagon::V4_SL2_return_fnew: + case Hexagon::V4_SL2_return_t: + case Hexagon::V4_SL2_return_tnew: + case Hexagon::V4_SS1_storeb_io: + case Hexagon::V4_SS1_storew_io: + case Hexagon::V4_SS2_allocframe: + case Hexagon::V4_SS2_storebi0: + case Hexagon::V4_SS2_storebi1: + case Hexagon::V4_SS2_stored_sp: + case Hexagon::V4_SS2_storeh_io: + case Hexagon::V4_SS2_storew_sp: + case Hexagon::V4_SS2_storewi0: + case Hexagon::V4_SS2_storewi1: + return true; + } +} + bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask); @@ -575,6 +690,25 @@ int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) { return Value; } +void HexagonMCInstrInfo::setMustExtend(MCExpr const &Expr, bool Val) { + HexagonMCExpr &HExpr = const_cast(cast(Expr)); + HExpr.setMustExtend(Val); +} + +bool HexagonMCInstrInfo::mustExtend(MCExpr const &Expr) { + HexagonMCExpr const &HExpr = cast(Expr); + return HExpr.mustExtend(); +} +void HexagonMCInstrInfo::setMustNotExtend(MCExpr const &Expr, bool Val) { + HexagonMCExpr &HExpr = + const_cast(cast(Expr)); + HExpr.setMustNotExtend(Val); +} +bool HexagonMCInstrInfo::mustNotExtend(MCExpr const &Expr) { + HexagonMCExpr const &HExpr = cast(Expr); + return HExpr.mustNotExtend(); +} + void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) { MCInst Nop; Nop.setOpcode(Hexagon::A2_nop); @@ -639,10 +773,32 @@ void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) { Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask); assert(isMemStoreReorderEnabled(MCI)); } +void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) { + HexagonMCExpr &HExpr = + const_cast(*llvm::cast(&Expr)); + HExpr.setS23_2_reloc(Val); +} +bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) { + HexagonMCExpr const &HExpr = *llvm::cast(&Expr); + return HExpr.s23_2_reloc(); +} void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) { assert(isBundle(MCI)); MCOperand &Operand = MCI.getOperand(0); Operand.setImm(Operand.getImm() | outerLoopMask); } + +unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer, + unsigned Producer, + unsigned Producer2) { + // If we're a single vector consumer of a double producer, set subreg bit + // based on if we're accessing the lower or upper register component + if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) + if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31) + return (Consumer - Hexagon::V0) & 0x1; + if (Consumer == Producer2) + return 0x1; + return 0; +} } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 0237b2884a3b..58a8f68b9847 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -75,7 +75,7 @@ MCInst createBundle(); // Return the extender for instruction at Index or nullptr if none MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, - MCInst const &MCI, bool MustExtend); + MCInst const &MCI); // Create a duplex instruction given the two subinsts MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, @@ -107,6 +107,9 @@ unsigned getDuplexCandidateGroup(MCInst const &MI); // Return a list of all possible instruction duplex combinations SmallVector getDuplexPossibilties(MCInstrInfo const &MCII, MCInst const &MCB); +unsigned getDuplexRegisterNumbering(unsigned Reg); + +MCExpr const &getExpr(MCExpr const &Expr); // Return the index of the extendable operand unsigned short getExtendableOp(MCInstrInfo const &MCII, MCInst const &MCI); @@ -260,7 +263,10 @@ bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI); /// Return whether the insn can be packaged only with an A-type insn in slot #1. bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI); +bool isSubInstruction(MCInst const &MCI); bool isVector(MCInstrInfo const &MCII, MCInst const &MCI); +bool mustExtend(MCExpr const &Expr); +bool mustNotExtend(MCExpr const &Expr); // Pad the bundle with nops to satisfy endloop requirements void padEndloop(MCContext &Context, MCInst &MCI); @@ -270,16 +276,22 @@ bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); // Replace the instructions inside MCB, represented by Candidate void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate); +bool s23_2_reloc(MCExpr const &Expr); // Marks a bundle as endloop0 void setInnerLoop(MCInst &MCI); void setMemReorderDisabled(MCInst &MCI); void setMemStoreReorderEnabled(MCInst &MCI); +void setMustExtend(MCExpr const &Expr, bool Val = true); +void setMustNotExtend(MCExpr const &Expr, bool Val = true); +void setS23_2_reloc(MCExpr const &Expr, bool Val = true); // Marks a bundle as endloop1 void setOuterLoop(MCInst &MCI); // Would duplexing this instruction create a requirement to extend bool subInstWouldBeExtended(MCInst const &potentialDuplex); +unsigned SubregisterBit(unsigned Consumer, unsigned Producer, + unsigned Producer2); // Attempt to find and replace compound pairs void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index 8e70280c1a0d..7f8e7a4edb0c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -180,7 +180,6 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, if (MCS.size() == 1) { // case of one duplex // copy the created duplex in the shuffler to the bundle MCS.copyTo(MCB); - doneShuffling = true; return HexagonShuffler::SHUFFLE_SUCCESS; } // try shuffle with this duplex diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 9a292577a8f3..35a1a23a8892 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -16,7 +16,6 @@ #include "HexagonMCAsmInfo.h" #include "HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" -#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrInfo.h" @@ -48,10 +47,46 @@ cl::opt llvm::HexagonDisableDuplex ("mno-pairing", cl::desc("Disable looking for duplex instructions for Hexagon")); +static cl::opt HexagonV4ArchVariant("mv4", cl::Hidden, cl::init(false), + cl::desc("Build for Hexagon V4")); + +static cl::opt HexagonV5ArchVariant("mv5", cl::Hidden, cl::init(false), + cl::desc("Build for Hexagon V5")); + +static cl::opt HexagonV55ArchVariant("mv55", cl::Hidden, cl::init(false), + cl::desc("Build for Hexagon V55")); + +static cl::opt HexagonV60ArchVariant("mv60", cl::Hidden, cl::init(false), + cl::desc("Build for Hexagon V60")); + + +static StringRef DefaultArch = "hexagonv60"; + +static StringRef HexagonGetArchVariant() { + if (HexagonV4ArchVariant) + return "hexagonv4"; + if (HexagonV5ArchVariant) + return "hexagonv5"; + if (HexagonV55ArchVariant) + return "hexagonv55"; + if (HexagonV60ArchVariant) + return "hexagonv60"; + return ""; +} + StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) { - if (CPU.empty()) - CPU = "hexagonv60"; - return CPU; + StringRef ArchV = HexagonGetArchVariant(); + if (!ArchV.empty() && !CPU.empty()) { + if (ArchV != CPU) + report_fatal_error("conflicting architectures specified."); + return CPU; + } + if (ArchV.empty()) { + if (CPU.empty()) + CPU = DefaultArch; + return CPU; + } + return ArchV; } MCInstrInfo *llvm::createHexagonMCInstrInfo() { @@ -62,7 +97,7 @@ MCInstrInfo *llvm::createHexagonMCInstrInfo() { static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); - InitHexagonMCRegisterInfo(X, Hexagon::R0); + InitHexagonMCRegisterInfo(X, Hexagon::R31); return X; } @@ -121,10 +156,14 @@ public: HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI) : HexagonTargetStreamer(S) { auto Bits = STI.getFeatureBits(); - unsigned Flags; - if (Bits.to_ullong() & llvm::Hexagon::ArchV5) + unsigned Flags = 0; + if (Bits[Hexagon::ArchV60]) + Flags = ELF::EF_HEXAGON_MACH_V60; + else if (Bits[Hexagon::ArchV55]) + Flags = ELF::EF_HEXAGON_MACH_V55; + else if (Bits[Hexagon::ArchV5]) Flags = ELF::EF_HEXAGON_MACH_V5; - else + else if (Bits[Hexagon::ArchV4]) Flags = ELF::EF_HEXAGON_MACH_V4; getStreamer().getAssembler().setELFHeaderEFlags(Flags); } @@ -159,17 +198,6 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT, - Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - if (RM == Reloc::Default) - RM = Reloc::Static; - X->initMCCodeGenInfo(RM, CM, OL); - return X; -} - static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, @@ -204,10 +232,6 @@ extern "C" void LLVMInitializeHexagonTargetMC() { // Register the MC asm info. RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo); - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget, - createHexagonMCCodeGenInfo); - // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo); diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile deleted file mode 100644 index 885be2ddbd88..000000000000 --- a/lib/Target/Hexagon/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Hexagon/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMHexagonDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile deleted file mode 100644 index c53b8e56aafc..000000000000 --- a/lib/Target/Hexagon/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -##===- lib/Target/Hexagon/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../.. -LIBRARYNAME = LLVMHexagonCodeGen -TARGET = Hexagon - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = HexagonGenRegisterInfo.inc \ - HexagonGenInstrInfo.inc \ - HexagonGenAsmMatcher.inc \ - HexagonGenAsmWriter.inc \ - HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \ - HexagonGenCallingConv.inc \ - HexagonGenDFAPacketizer.inc \ - HexagonGenMCCodeEmitter.inc \ - HexagonGenDisassemblerTables.inc - -DIRS = TargetInfo MCTargetDesc Disassembler AsmParser - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp index c547c7195075..61a83dada218 100644 --- a/lib/Target/Hexagon/RDFCopy.cpp +++ b/lib/Target/Hexagon/RDFCopy.cpp @@ -7,37 +7,85 @@ // //===----------------------------------------------------------------------===// // -// Simplistic RDF-based copy propagation. +// RDF-based copy propagation. #include "RDFCopy.h" #include "RDFGraph.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" - -#include +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; +using namespace rdf; #ifndef NDEBUG static cl::opt CpLimit("rdf-cp-limit", cl::init(0), cl::Hidden); static unsigned CpCount = 0; #endif -using namespace llvm; -using namespace rdf; +bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::COPY: { + const MachineOperand &Dst = MI->getOperand(0); + const MachineOperand &Src = MI->getOperand(1); + RegisterRef DstR = { Dst.getReg(), Dst.getSubReg() }; + RegisterRef SrcR = { Src.getReg(), Src.getSubReg() }; + if (TargetRegisterInfo::isVirtualRegister(DstR.Reg)) { + if (!TargetRegisterInfo::isVirtualRegister(SrcR.Reg)) + return false; + MachineRegisterInfo &MRI = DFG.getMF().getRegInfo(); + if (MRI.getRegClass(DstR.Reg) != MRI.getRegClass(SrcR.Reg)) + return false; + } else if (TargetRegisterInfo::isPhysicalRegister(DstR.Reg)) { + if (!TargetRegisterInfo::isPhysicalRegister(SrcR.Reg)) + return false; + const TargetRegisterInfo &TRI = DFG.getTRI(); + if (TRI.getMinimalPhysRegClass(DstR.Reg) != + TRI.getMinimalPhysRegClass(SrcR.Reg)) + return false; + } else { + // Copy between some unknown objects. + return false; + } + EM.insert(std::make_pair(DstR, SrcR)); + return true; + } + case TargetOpcode::REG_SEQUENCE: { + const MachineOperand &Dst = MI->getOperand(0); + RegisterRef DefR = { Dst.getReg(), Dst.getSubReg() }; + SmallVector Inputs; + const TargetInstrInfo &TII = DFG.getTII(); + if (!TII.getRegSequenceInputs(*MI, 0, Inputs)) + return false; + for (auto I : Inputs) { + unsigned S = DFG.getTRI().composeSubRegIndices(DefR.Sub, I.SubIdx); + RegisterRef DR = { DefR.Reg, S }; + RegisterRef SR = { I.Reg, I.SubReg }; + EM.insert(std::make_pair(DR, SR)); + } + return true; + } + } + return false; +} -void CopyPropagation::recordCopy(NodeAddr SA, MachineInstr *MI) { - assert(MI->getOpcode() == TargetOpcode::COPY); - const MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1); - RegisterRef DstR = { Op0.getReg(), Op0.getSubReg() }; - RegisterRef SrcR = { Op1.getReg(), Op1.getSubReg() }; - auto FS = DefM.find(SrcR); - if (FS == DefM.end() || FS->second.empty()) - return; + +void CopyPropagation::recordCopy(NodeAddr SA, EqualityMap &EM) { + CopyMap.insert(std::make_pair(SA.Id, EM)); Copies.push_back(SA.Id); - RDefMap[SrcR][SA.Id] = FS->second.top()->Id; - // Insert DstR into the map. - RDefMap[DstR]; + + for (auto I : EM) { + auto FS = DefM.find(I.second); + if (FS == DefM.end() || FS->second.empty()) + continue; // Undefined source + RDefMap[I.second][SA.Id] = FS->second.top()->Id; + // Insert DstR into the map. + RDefMap[I.first]; + } } @@ -74,9 +122,9 @@ bool CopyPropagation::scanBlock(MachineBasicBlock *B) { for (NodeAddr IA : BA.Addr->members(DFG)) { if (DFG.IsCode(IA)) { NodeAddr SA = IA; - MachineInstr *MI = SA.Addr->getCode(); - if (MI->isCopy()) - recordCopy(SA, MI); + EqualityMap EM; + if (interpretAsCopy(SA.Addr->getCode(), EM)) + recordCopy(SA, EM); } updateMap(IA); @@ -97,8 +145,14 @@ bool CopyPropagation::run() { if (trace()) { dbgs() << "Copies:\n"; - for (auto I : Copies) - dbgs() << *DFG.addr(I).Addr->getCode(); + for (auto I : Copies) { + dbgs() << "Instr: " << *DFG.addr(I).Addr->getCode(); + dbgs() << " eq: {"; + for (auto J : CopyMap[I]) + dbgs() << ' ' << Print(J.first, DFG) << '=' + << Print(J.second, DFG); + dbgs() << " }\n"; + } dbgs() << "\nRDef map:\n"; for (auto R : RDefMap) { dbgs() << Print(R.first, DFG) << " -> {"; @@ -110,70 +164,87 @@ bool CopyPropagation::run() { } bool Changed = false; - NodeSet Deleted; #ifndef NDEBUG bool HasLimit = CpLimit.getNumOccurrences() > 0; #endif - for (auto I : Copies) { + for (auto C : Copies) { #ifndef NDEBUG if (HasLimit && CpCount >= CpLimit) break; #endif - if (Deleted.count(I)) - continue; - auto SA = DFG.addr(I); - NodeList Ds = SA.Addr->members_if(DFG.IsDef, DFG); - if (Ds.size() != 1) - continue; - NodeAddr DA = Ds[0]; - RegisterRef DR0 = DA.Addr->getRegRef(); - NodeList Us = SA.Addr->members_if(DFG.IsUse, DFG); - if (Us.size() != 1) + auto SA = DFG.addr(C); + auto FS = CopyMap.find(SA.Id); + if (FS == CopyMap.end()) continue; - NodeAddr UA0 = Us[0]; - RegisterRef UR0 = UA0.Addr->getRegRef(); - NodeId RD0 = UA0.Addr->getReachingDef(); - - for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) { - auto UA = DFG.addr(N); - NextN = UA.Addr->getSibling(); - uint16_t F = UA.Addr->getFlags(); - if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed)) - continue; - if (UA.Addr->getRegRef() != DR0) - continue; - NodeAddr IA = UA.Addr->getOwner(DFG); - assert(DFG.IsCode(IA)); - MachineInstr *MI = NodeAddr(IA).Addr->getCode(); - if (RDefMap[UR0][IA.Id] != RD0) + + EqualityMap &EM = FS->second; + for (NodeAddr DA : SA.Addr->members_if(DFG.IsDef, DFG)) { + RegisterRef DR = DA.Addr->getRegRef(); + auto FR = EM.find(DR); + if (FR == EM.end()) continue; - MachineOperand &Op = UA.Addr->getOp(); - if (Op.isTied()) + RegisterRef SR = FR->second; + if (DR == SR) continue; - if (trace()) { - dbgs() << "can replace " << Print(DR0, DFG) - << " with " << Print(UR0, DFG) << " in " - << *NodeAddr(IA).Addr->getCode(); - } - - Op.setReg(UR0.Reg); - Op.setSubReg(UR0.Sub); - Changed = true; -#ifndef NDEBUG - if (HasLimit && CpCount >= CpLimit) - break; - CpCount++; -#endif - if (MI->isCopy()) { - MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1); - if (Op0.getReg() == Op1.getReg() && Op0.getSubReg() == Op1.getSubReg()) - MI->eraseFromParent(); - Deleted.insert(IA.Id); - } - } - } + auto &RDefSR = RDefMap[SR]; + NodeId RDefSR_SA = RDefSR[SA.Id]; + + for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) { + auto UA = DFG.addr(N); + NextN = UA.Addr->getSibling(); + uint16_t F = UA.Addr->getFlags(); + if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed)) + continue; + if (UA.Addr->getRegRef() != DR) + continue; + + NodeAddr IA = UA.Addr->getOwner(DFG); + assert(DFG.IsCode(IA)); + if (RDefSR[IA.Id] != RDefSR_SA) + continue; + + MachineOperand &Op = UA.Addr->getOp(); + if (Op.isTied()) + continue; + if (trace()) { + dbgs() << "Can replace " << Print(DR, DFG) + << " with " << Print(SR, DFG) << " in " + << *NodeAddr(IA).Addr->getCode(); + } + + Op.setReg(SR.Reg); + Op.setSubReg(SR.Sub); + DFG.unlinkUse(UA, false); + if (RDefSR_SA != 0) { + UA.Addr->linkToDef(UA.Id, DFG.addr(RDefSR_SA)); + } else { + UA.Addr->setReachingDef(0); + UA.Addr->setSibling(0); + } + + Changed = true; + #ifndef NDEBUG + if (HasLimit && CpCount >= CpLimit) + break; + CpCount++; + #endif + + auto FC = CopyMap.find(IA.Id); + if (FC != CopyMap.end()) { + // Update the EM map in the copy's entry. + auto &M = FC->second; + for (auto &J : M) { + if (J.second != DR) + continue; + J.second = SR; + break; + } + } + } // for (N in reached-uses) + } // for (DA in defs) + } // for (C in Copies) return Changed; } diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h index 02531b94c9b0..e8a576cf57a3 100644 --- a/lib/Target/Hexagon/RDFCopy.h +++ b/lib/Target/Hexagon/RDFCopy.h @@ -18,17 +18,20 @@ namespace llvm { class MachineBasicBlock; class MachineDominatorTree; class MachineInstr; -} namespace rdf { struct CopyPropagation { CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg), Trace(false) {} + virtual ~CopyPropagation() {} bool run(); void trace(bool On) { Trace = On; } bool trace() const { return Trace; } + typedef std::map EqualityMap; + virtual bool interpretAsCopy(const MachineInstr *MI, EqualityMap &EM); + private: const MachineDominatorTree &MDT; DataFlowGraph &DFG; @@ -37,12 +40,15 @@ namespace rdf { // map: register -> (map: stmt -> reaching def) std::map> RDefMap; + // map: statement -> (map: dst reg -> src reg) + std::map CopyMap; std::vector Copies; - void recordCopy(NodeAddr SA, MachineInstr *MI); + void recordCopy(NodeAddr SA, EqualityMap &EM); void updateMap(NodeAddr IA); bool scanBlock(MachineBasicBlock *B); }; -} +} // namespace rdf +} // namespace llvm #endif diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp index 95668577bd50..63177d51cada 100644 --- a/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/lib/Target/Hexagon/RDFDeadCode.cpp @@ -18,9 +18,38 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include + using namespace llvm; using namespace rdf; +// This drastically improves execution time in "collect" over using +// SetVector as a work queue, and popping the first element from it. +template struct DeadCodeElimination::SetQueue { + SetQueue() : Set(), Queue() {} + + bool empty() const { + return Queue.empty(); + } + T pop_front() { + T V = Queue.front(); + Queue.pop(); + Set.erase(V); + return V; + } + void push_back(T V) { + if (Set.count(V)) + return; + Queue.push(V); + Set.insert(V); + } + +private: + DenseSet Set; + std::queue Queue; +}; + + // Check if the given instruction has observable side-effects, i.e. if // it should be considered "live". It is safe for this function to be // overly conservative (i.e. return "true" for all instructions), but it @@ -40,33 +69,33 @@ bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const { } void DeadCodeElimination::scanInstr(NodeAddr IA, - SetVector &WorkQ) { + SetQueue &WorkQ) { if (!DFG.IsCode(IA)) return; if (!isLiveInstr(NodeAddr(IA).Addr->getCode())) return; for (NodeAddr RA : IA.Addr->members(DFG)) { if (!LiveNodes.count(RA.Id)) - WorkQ.insert(RA.Id); + WorkQ.push_back(RA.Id); } } void DeadCodeElimination::processDef(NodeAddr DA, - SetVector &WorkQ) { + SetQueue &WorkQ) { NodeAddr IA = DA.Addr->getOwner(DFG); for (NodeAddr UA : IA.Addr->members_if(DFG.IsUse, DFG)) { if (!LiveNodes.count(UA.Id)) - WorkQ.insert(UA.Id); + WorkQ.push_back(UA.Id); } for (NodeAddr TA : DFG.getRelatedRefs(IA, DA)) LiveNodes.insert(TA.Id); } void DeadCodeElimination::processUse(NodeAddr UA, - SetVector &WorkQ) { + SetQueue &WorkQ) { for (NodeAddr DA : LV.getAllReachingDefs(UA)) { if (!LiveNodes.count(DA.Id)) - WorkQ.insert(DA.Id); + WorkQ.push_back(DA.Id); } } @@ -84,14 +113,13 @@ bool DeadCodeElimination::collect() { // instruction are considered live. For each live use, all its reaching // defs are considered live. LiveNodes.clear(); - SetVector WorkQ; + SetQueue WorkQ; for (NodeAddr BA : DFG.getFunc().Addr->members(DFG)) for (NodeAddr IA : BA.Addr->members(DFG)) scanInstr(IA, WorkQ); while (!WorkQ.empty()) { - NodeId N = *WorkQ.begin(); - WorkQ.remove(N); + NodeId N = WorkQ.pop_front(); LiveNodes.insert(N); auto RA = DFG.addr(N); if (DFG.IsDef(RA)) @@ -183,9 +211,9 @@ bool DeadCodeElimination::erase(const SetVector &Nodes) { if (trace()) dbgs() << " " << PrintNode(RA, DFG) << '\n'; if (DFG.IsUse(RA)) - DFG.unlinkUse(RA); + DFG.unlinkUse(RA, true); else if (DFG.IsDef(RA)) - DFG.unlinkDef(RA); + DFG.unlinkDef(RA, true); } // Now, remove all dead instruction nodes. diff --git a/lib/Target/Hexagon/RDFDeadCode.h b/lib/Target/Hexagon/RDFDeadCode.h index f4373fb5007d..8977e730b855 100644 --- a/lib/Target/Hexagon/RDFDeadCode.h +++ b/lib/Target/Hexagon/RDFDeadCode.h @@ -30,7 +30,6 @@ namespace llvm { class MachineRegisterInfo; -} namespace rdf { struct DeadCodeElimination { @@ -55,11 +54,14 @@ namespace rdf { MachineRegisterInfo &MRI; Liveness LV; + template struct SetQueue; + bool isLiveInstr(const MachineInstr *MI) const; - void scanInstr(NodeAddr IA, SetVector &WorkQ); - void processDef(NodeAddr DA, SetVector &WorkQ); - void processUse(NodeAddr UA, SetVector &WorkQ); + void scanInstr(NodeAddr IA, SetQueue &WorkQ); + void processDef(NodeAddr DA, SetQueue &WorkQ); + void processUse(NodeAddr UA, SetQueue &WorkQ); }; -} +} // namespace rdf +} // namespace llvm #endif diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 9b47422153bb..273d6b7cb0c8 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -25,6 +25,7 @@ using namespace rdf; // Printing functions. Have them here first, so that the rest of the code // can use them. +namespace llvm { namespace rdf { template<> @@ -298,6 +299,7 @@ raw_ostream &operator<< (raw_ostream &OS, } } // namespace rdf +} // namespace llvm // Node allocation functions. // @@ -315,7 +317,7 @@ void NodeAllocator::startNewBlock() { // Check if the block index is still within the allowed range, i.e. less // than 2^N, where N is the number of bits in NodeId for the block index. // BitsPerIndex is the number of bits per node index. - assert((Blocks.size() < (1U << (8*sizeof(NodeId)-BitsPerIndex))) && + assert((Blocks.size() < ((size_t)1 << (8*sizeof(NodeId)-BitsPerIndex))) && "Out of bits for block index"); ActiveEnd = P; } @@ -674,7 +676,7 @@ bool RegisterAliasInfo::alias(RegisterRef RA, RegisterRef RB) const { // unchanged across this def. bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum) const { - return TII.isPredicated(&In); + return TII.isPredicated(In); } // Check if the definition of RR produces an unspecified value. @@ -686,11 +688,17 @@ bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum) return false; } -// Check if the given instruction specifically requires +// Check if the given instruction specifically requires bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) const { - if (In.isCall() || In.isReturn()) + if (In.isCall() || In.isReturn() || In.isInlineAsm()) return true; + // Check for a tail call. + if (In.isBranch()) + for (auto &O : In.operands()) + if (O.isGlobal() || O.isSymbol()) + return true; + const MCInstrDesc &D = In.getDesc(); if (!D.getImplicitDefs() && !D.getImplicitUses()) return false; @@ -919,7 +927,7 @@ NodeAddr DataFlowGraph::newFunc(MachineFunction *MF) { } // Build the data flow graph. -void DataFlowGraph::build() { +void DataFlowGraph::build(unsigned Options) { reset(); Func = newFunc(&MF); @@ -964,7 +972,8 @@ void DataFlowGraph::build() { linkBlockRefs(DM, EA); // Finally, remove all unused phi nodes. - removeUnusedPhis(); + if (!(Options & BuildOptions::KeepDeadPhis)) + removeUnusedPhis(); } // For each stack in the map DefM, push the delimiter for block B on it. @@ -1167,6 +1176,17 @@ NodeAddr DataFlowGraph::getNextShadow(NodeAddr IA, void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { auto SA = newStmt(BA, &In); + auto isCall = [] (const MachineInstr &In) -> bool { + if (In.isCall()) + return true; + // Is tail call? + if (In.isBranch()) + for (auto &Op : In.operands()) + if (Op.isGlobal() || Op.isSymbol()) + return true; + return false; + }; + // Collect a set of registers that this instruction implicitly uses // or defines. Implicit operands from an instruction will be ignored // unless they are listed here. @@ -1178,8 +1198,8 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { while (uint16_t R = *ImpU++) ImpUses.insert({R, 0}); - bool IsCall = In.isCall(), IsReturn = In.isReturn(); - bool IsPredicated = TII.isPredicated(&In); + bool NeedsImplicit = isCall(In) || In.isInlineAsm() || In.isReturn(); + bool IsPredicated = TII.isPredicated(In); unsigned NumOps = In.getNumOperands(); // Avoid duplicate implicit defs. This will not detect cases of implicit @@ -1212,7 +1232,7 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { if (!Op.isReg() || !Op.isDef() || !Op.isImplicit()) continue; RegisterRef RR = { Op.getReg(), Op.getSubReg() }; - if (!IsCall && !ImpDefs.count(RR)) + if (!NeedsImplicit && !ImpDefs.count(RR)) continue; if (DoneDefs.count(RR)) continue; @@ -1237,7 +1257,7 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { // instructions regardless of whether or not they appear in the instruction // descriptor's list. bool Implicit = Op.isImplicit(); - bool TakeImplicit = IsReturn || IsCall || IsPredicated; + bool TakeImplicit = NeedsImplicit || IsPredicated; if (Implicit && !TakeImplicit && !ImpUses.count(RR)) continue; uint16_t Flags = NodeAttrs::None; @@ -1456,9 +1476,9 @@ void DataFlowGraph::removeUnusedPhis() { PhiQ.insert(OA.Id); } if (RA.Addr->isDef()) - unlinkDef(RA); + unlinkDef(RA, true); else - unlinkUse(RA); + unlinkUse(RA, true); } NodeAddr BA = PA.Addr->getOwner(*this); BA.Addr->removeMember(PA, *this); @@ -1546,6 +1566,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr BA) { // Push block delimiters. markBlock(BA.Id, DefM); + assert(BA.Addr && "block node address is needed to create a data-flow link"); // For each non-phi instruction in the block, link all the defs and uses // to their reaching defs. For any member of the block (including phis), // push the defs on the corresponding stacks. @@ -1593,13 +1614,10 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr BA) { } // Remove the use node UA from any data-flow and structural links. -void DataFlowGraph::unlinkUse(NodeAddr UA) { +void DataFlowGraph::unlinkUseDF(NodeAddr UA) { NodeId RD = UA.Addr->getReachingDef(); NodeId Sib = UA.Addr->getSibling(); - NodeAddr IA = UA.Addr->getOwner(*this); - IA.Addr->removeMember(UA, *this); - if (RD == 0) { assert(Sib == 0); return; @@ -1623,7 +1641,7 @@ void DataFlowGraph::unlinkUse(NodeAddr UA) { } // Remove the def node DA from any data-flow and structural links. -void DataFlowGraph::unlinkDef(NodeAddr DA) { +void DataFlowGraph::unlinkDefDF(NodeAddr DA) { // // RD // | reached @@ -1710,7 +1728,4 @@ void DataFlowGraph::unlinkDef(NodeAddr DA) { Last.Addr->setSibling(RDA.Addr->getReachedUse()); RDA.Addr->setReachedUse(ReachedUses.front().Id); } - - NodeAddr IA = DA.Addr->getOwner(*this); - IA.Addr->removeMember(DA, *this); } diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h index 7da7bb5973cf..49b053741263 100644 --- a/lib/Target/Hexagon/RDFGraph.h +++ b/lib/Target/Hexagon/RDFGraph.h @@ -202,7 +202,6 @@ #ifndef RDF_GRAPH_H #define RDF_GRAPH_H -#include "llvm/ADT/BitVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -213,8 +212,6 @@ #include #include -using namespace llvm; - namespace llvm { class MachineBasicBlock; class MachineFunction; @@ -224,7 +221,6 @@ namespace llvm { class MachineDominatorTree; class TargetInstrInfo; class TargetRegisterInfo; -} namespace rdf { typedef uint32_t NodeId; @@ -288,6 +284,13 @@ namespace rdf { } }; + struct BuildOptions { + enum : unsigned { + None = 0x00, + KeepDeadPhis = 0x01, // Do not remove dead phis during build. + }; + }; + template struct NodeAddr { NodeAddr() : Addr(nullptr), Id(0) {} NodeAddr(T A, NodeId I) : Addr(A), Id(I) {} @@ -678,7 +681,7 @@ namespace rdf { typedef std::map DefStackMap; - void build(); + void build(unsigned Options = BuildOptions::None); void pushDefs(NodeAddr IA, DefStackMap &DM); void markBlock(NodeId B, DefStackMap &DefM); void releaseBlock(NodeId B, DefStackMap &DefM); @@ -697,8 +700,16 @@ namespace rdf { NodeList getRelatedRefs(NodeAddr IA, NodeAddr RA) const; - void unlinkUse(NodeAddr UA); - void unlinkDef(NodeAddr DA); + void unlinkUse(NodeAddr UA, bool RemoveFromOwner) { + unlinkUseDF(UA); + if (RemoveFromOwner) + removeFromOwner(UA); + } + void unlinkDef(NodeAddr DA, bool RemoveFromOwner) { + unlinkDefDF(DA); + if (RemoveFromOwner) + removeFromOwner(DA); + } // Some useful filters. template @@ -765,6 +776,13 @@ namespace rdf { void linkStmtRefs(DefStackMap &DefM, NodeAddr SA); void linkBlockRefs(DefStackMap &DefM, NodeAddr BA); + void unlinkUseDF(NodeAddr UA); + void unlinkDefDF(NodeAddr DA); + void removeFromOwner(NodeAddr RA) { + NodeAddr IA = RA.Addr->getOwner(*this); + IA.Addr->removeMember(RA, *this); + } + TimerGroup TimeG; NodeAddr Func; NodeAllocator Memory; @@ -837,5 +855,6 @@ namespace rdf { : Print>(x, g) {} }; } // namespace rdf +} // namespace llvm #endif // RDF_GRAPH_H diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp index 1d9bd372ff4e..641f01423176 100644 --- a/lib/Target/Hexagon/RDFLiveness.cpp +++ b/lib/Target/Hexagon/RDFLiveness.cpp @@ -36,6 +36,7 @@ using namespace llvm; using namespace rdf; +namespace llvm { namespace rdf { template<> raw_ostream &operator<< (raw_ostream &OS, const Print &P) { @@ -52,7 +53,8 @@ namespace rdf { OS << " }"; return OS; } -} +} // namespace rdf +} // namespace llvm // The order in the returned sequence is the order of reaching defs in the // upward traversal: the first def is the closest to the given reference RefA, @@ -235,7 +237,93 @@ NodeList Liveness::getAllReachingDefs(NodeAddr RefA) { } +NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR, + NodeAddr RefA, NodeSet &Visited, const NodeSet &Defs) { + // Collect all defined registers. Do not consider phis to be defining + // anything, only collect "real" definitions. + RegisterSet DefRRs; + for (const auto D : Defs) { + const auto DA = DFG.addr(D); + if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef)) + DefRRs.insert(DA.Addr->getRegRef()); + } + + auto RDs = getAllReachingDefs(RefRR, RefA, true, DefRRs); + if (RDs.empty()) + return Defs; + + // Make a copy of the preexisting definitions and add the newly found ones. + NodeSet TmpDefs = Defs; + for (auto R : RDs) + TmpDefs.insert(R.Id); + + NodeSet Result = Defs; + + for (NodeAddr DA : RDs) { + Result.insert(DA.Id); + if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef)) + continue; + NodeAddr PA = DA.Addr->getOwner(DFG); + if (Visited.count(PA.Id)) + continue; + Visited.insert(PA.Id); + // Go over all phi uses and get the reaching defs for each use. + for (auto U : PA.Addr->members_if(DFG.IsRef, DFG)) { + const auto &T = getAllReachingDefsRec(RefRR, U, Visited, TmpDefs); + Result.insert(T.begin(), T.end()); + } + } + + return Result; +} + + +NodeSet Liveness::getAllReachedUses(RegisterRef RefRR, + NodeAddr DefA, const RegisterSet &DefRRs) { + NodeSet Uses; + + // If the original register is already covered by all the intervening + // defs, no more uses can be reached. + if (RAI.covers(DefRRs, RefRR)) + return Uses; + + // Add all directly reached uses. + NodeId U = DefA.Addr->getReachedUse(); + while (U != 0) { + auto UA = DFG.addr(U); + auto UR = UA.Addr->getRegRef(); + if (RAI.alias(RefRR, UR) && !RAI.covers(DefRRs, UR)) + Uses.insert(U); + U = UA.Addr->getSibling(); + } + + // Traverse all reached defs. + for (NodeId D = DefA.Addr->getReachedDef(), NextD; D != 0; D = NextD) { + auto DA = DFG.addr(D); + NextD = DA.Addr->getSibling(); + auto DR = DA.Addr->getRegRef(); + // If this def is already covered, it cannot reach anything new. + // Similarly, skip it if it is not aliased to the interesting register. + if (RAI.covers(DefRRs, DR) || !RAI.alias(RefRR, DR)) + continue; + NodeSet T; + if (DA.Addr->getFlags() & NodeAttrs::Preserving) { + // If it is a preserving def, do not update the set of intervening defs. + T = getAllReachedUses(RefRR, DA, DefRRs); + } else { + RegisterSet NewDefRRs = DefRRs; + NewDefRRs.insert(DR); + T = getAllReachedUses(RefRR, DA, NewDefRRs); + } + Uses.insert(T.begin(), T.end()); + } + return Uses; +} + + void Liveness::computePhiInfo() { + RealUseMap.clear(); + NodeList Phis; NodeAddr FA = DFG.getFunc(); auto Blocks = FA.Addr->members(DFG); @@ -601,7 +689,11 @@ void Liveness::resetKills(MachineBasicBlock *B) { MI->clearKillInfo(); for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isDef()) + // An implicit def of a super-register may not necessarily start a + // live range of it, since an implicit use could be used to keep parts + // of it live. Instead of analyzing the implicit operands, ignore + // implicit defs. + if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) continue; unsigned R = Op.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(R)) @@ -616,8 +708,8 @@ void Liveness::resetKills(MachineBasicBlock *B) { if (!TargetRegisterInfo::isPhysicalRegister(R)) continue; bool IsLive = false; - for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) { - if (!Live[*SR]) + for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) { + if (!Live[*AR]) continue; IsLive = true; break; diff --git a/lib/Target/Hexagon/RDFLiveness.h b/lib/Target/Hexagon/RDFLiveness.h index 4c1e8f3ee838..2b49c7488ce3 100644 --- a/lib/Target/Hexagon/RDFLiveness.h +++ b/lib/Target/Hexagon/RDFLiveness.h @@ -26,7 +26,6 @@ namespace llvm { class TargetRegisterInfo; class MachineDominatorTree; class MachineDominanceFrontier; -} namespace rdf { struct Liveness { @@ -41,6 +40,10 @@ namespace rdf { NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr RefA, bool FullChain = false, const RegisterSet &DefRRs = RegisterSet()); NodeList getAllReachingDefs(NodeAddr RefA); + NodeSet getAllReachingDefsRec(RegisterRef RefRR, NodeAddr RefA, + NodeSet &Visited, const NodeSet &Defs); + NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr DefA, + const RegisterSet &DefRRs = RegisterSet()); LiveMapType &getLiveMap() { return LiveMap; } const LiveMapType &getLiveMap() const { return LiveMap; } @@ -101,6 +104,7 @@ namespace rdf { void traverse(MachineBasicBlock *B, RefMap &LiveIn); void emptify(RefMap &M); }; -} +} // namespace rdf +} // namespace llvm #endif // RDF_LIVENESS_H diff --git a/lib/Target/Hexagon/TargetInfo/Makefile b/lib/Target/Hexagon/TargetInfo/Makefile deleted file mode 100644 index 494cca112249..000000000000 --- a/lib/Target/Hexagon/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Hexagon/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMHexagonInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common -- cgit v1.2.3