aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-31 21:22:58 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-31 21:22:58 +0000
commit5ffd83dbcc34f10e07f6d3e968ae6365869615f4 (patch)
tree0e9f5cf729dde39f949698fddef45a34e2bc7f44 /contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
parent1799696096df87b52968b8996d00c91e0a5de8d9 (diff)
parentcfca06d7963fa0909f90483b42a6d7d194d01e08 (diff)
downloadsrc-5ffd83dbcc34f10e07f6d3e968ae6365869615f4.tar.gz
src-5ffd83dbcc34f10e07f6d3e968ae6365869615f4.zip
Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
master 2e10b7a39b9, the last commit before the llvmorg-12-init tag, from which release/11.x was branched. Note that for now, I rolled back all our local changes to make merging easier, and I will reapply the still-relevant ones after updating to 11.0.0-rc1.
Notes
Notes: svn path=/projects/clang1100-import/; revision=363742
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp662
1 files changed, 621 insertions, 41 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 48f781510254..4cc2b6bf7e7e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
@@ -495,6 +496,31 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
}
+std::string ARMBaseInstrInfo::createMIROperandComment(
+ const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
+ const TargetRegisterInfo *TRI) const {
+
+ // First, let's see if there is a generic comment for this operand
+ std::string GenericComment =
+ TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
+ if (!GenericComment.empty())
+ return GenericComment;
+
+ // If not, check if we have an immediate operand.
+ if (Op.getType() != MachineOperand::MO_Immediate)
+ return std::string();
+
+ // And print its corresponding condition code if the immediate is a
+ // predicate.
+ int FirstPredOp = MI.findFirstPredOperandIdx();
+ if (FirstPredOp != (int) OpIdx)
+ return std::string();
+
+ std::string CC = "CC::";
+ CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
+ return CC;
+}
+
bool ARMBaseInstrInfo::PredicateInstruction(
MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
unsigned Opc = MI.getOpcode();
@@ -811,7 +837,7 @@ void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
}
void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
- unsigned DestReg) {
+ Register DestReg) {
addUnpredicatedMveVpredNOp(MIB);
MIB.addReg(DestReg, RegState::Undef);
}
@@ -1009,6 +1035,36 @@ ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
}
+Optional<ParamLoadedValue>
+ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const {
+ if (auto DstSrcPair = isCopyInstrImpl(MI)) {
+ Register DstReg = DstSrcPair->Destination->getReg();
+
+ // TODO: We don't handle cases where the forwarding reg is narrower/wider
+ // than the copy registers. Consider for example:
+ //
+ // s16 = VMOVS s0
+ // s17 = VMOVS s1
+ // call @callee(d0)
+ //
+ // We'd like to describe the call site value of d0 as d8, but this requires
+ // gathering and merging the descriptions for the two VMOVS instructions.
+ //
+ // We also don't handle the reverse situation, where the forwarding reg is
+ // narrower than the copy destination:
+ //
+ // d8 = VMOVD d0
+ // call @callee(s1)
+ //
+ // We need to produce a fragment description (the call site value of s1 is
+ // /not/ just d8).
+ if (DstReg != Reg)
+ return None;
+ }
+ return TargetInstrInfo::describeLoadedValue(MI, Reg);
+}
+
const MachineInstrBuilder &
ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
@@ -1023,16 +1079,16 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
void ARMBaseInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
+ Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
+ Align Alignment = MFI.getObjectAlign(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), Align);
+ MFI.getObjectSize(FI), Alignment);
switch (TRI->getSpillSize(*RC)) {
case 2:
@@ -1102,7 +1158,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
// Use aligned spills if the stack can be realigned.
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
.addFrameIndex(FI)
.addImm(16)
@@ -1130,7 +1186,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
.addFrameIndex(FI)
@@ -1153,7 +1209,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
@@ -1264,17 +1320,17 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
+ Register DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
+ const Align Alignment = MFI.getObjectAlign(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), Align);
+ MFI.getObjectSize(FI), Alignment);
switch (TRI->getSpillSize(*RC)) {
case 2:
@@ -1343,7 +1399,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI)
.addImm(16)
@@ -1367,7 +1423,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
.addFrameIndex(FI)
@@ -1390,7 +1446,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI)
@@ -1682,13 +1738,13 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
- CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
return PCLabelId;
}
void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx,
+ Register DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
unsigned Opcode = Orig.getOpcode();
@@ -1959,6 +2015,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MI.isTerminator() || MI.isPosition())
return true;
+ // INLINEASM_BR can jump to another block
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ return true;
+
// Treat the start of the IT block as a scheduling boundary, but schedule
// t2IT along with all instructions following it.
// FIXME: This is a big hammer. But the alternative is to add all potential
@@ -2120,7 +2180,7 @@ ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
- unsigned &PredReg) {
+ Register &PredReg) {
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx == -1) {
PredReg = 0;
@@ -2150,7 +2210,7 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
case ARM::MOVCCr:
case ARM::t2MOVCCr: {
// MOVCC can be commuted by inverting the condition.
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
@@ -2171,9 +2231,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
MachineInstr *
-ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const {
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
@@ -2353,9 +2413,9 @@ unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg,
- unsigned BaseReg, int NumBytes,
- ARMCC::CondCodes Pred, unsigned PredReg,
+ const DebugLoc &dl, Register DestReg,
+ Register BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, Register PredReg,
const ARMBaseInstrInfo &TII,
unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
@@ -2515,7 +2575,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
}
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ Register FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MI.getDesc();
@@ -2671,8 +2731,8 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
-bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const {
switch (MI.getOpcode()) {
default: break;
@@ -2708,7 +2768,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
/// operates on the given source register and applies the same mask
/// as a 'tst' instruction. Provide a limited look-through for copies.
/// When successful, MI will hold the found instruction.
-static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg,
int CmpMask, bool CommonUse) {
switch (MI->getOpcode()) {
case ARM::ANDri:
@@ -2743,7 +2803,7 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
- unsigned SrcReg, unsigned SrcReg2,
+ Register SrcReg, Register SrcReg2,
int ImmValue, const MachineInstr *OI,
bool &IsThumb1) {
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
@@ -2879,7 +2939,7 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
/// condition code of instructions which use the flags.
bool ARMBaseInstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
int CmpValue, const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
@@ -3166,7 +3226,7 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
return true;
MachineBasicBlock::const_iterator Next = &MI;
++Next;
- unsigned SrcReg, SrcReg2;
+ Register SrcReg, SrcReg2;
int CmpMask, CmpValue;
bool IsThumb1;
if (Next != MI.getParent()->end() &&
@@ -3177,7 +3237,7 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
}
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
- unsigned Reg,
+ Register Reg,
MachineRegisterInfo *MRI) const {
// Fold large immediates into add, sub, or, xor.
unsigned DefOpc = DefMI.getOpcode();
@@ -3729,7 +3789,7 @@ unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
- (*MI.memoperands_begin())->getAlignment() < 8)
+ (*MI.memoperands_begin())->getAlign() < Align(8))
++UOps;
return UOps;
}
@@ -4316,10 +4376,10 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
return -1;
unsigned DefAlign = DefMI.hasOneMemOperand()
- ? (*DefMI.memoperands_begin())->getAlignment()
+ ? (*DefMI.memoperands_begin())->getAlign().value()
: 0;
unsigned UseAlign = UseMI.hasOneMemOperand()
- ? (*UseMI.memoperands_begin())->getAlignment()
+ ? (*UseMI.memoperands_begin())->getAlign().value()
: 0;
// Get the itinerary's latency if possible, and handle variable_ops.
@@ -4366,10 +4426,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
auto *DefMN = cast<MachineSDNode>(DefNode);
unsigned DefAlign = !DefMN->memoperands_empty()
- ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ ? (*DefMN->memoperands_begin())->getAlign().value()
+ : 0;
auto *UseMN = cast<MachineSDNode>(UseNode);
unsigned UseAlign = !UseMN->memoperands_empty()
- ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ ? (*UseMN->memoperands_begin())->getAlign().value()
+ : 0;
int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
UseMCID, UseIdx, UseAlign);
@@ -4660,7 +4722,7 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
// Adjust for dynamic def-side opcode variants not captured by the itinerary.
unsigned DefAlign =
- MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
+ MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
if (Adj >= 0 || (int)Latency > -Adj) {
return Latency + Adj;
@@ -4782,7 +4844,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
}
@@ -5353,7 +5415,8 @@ Optional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI,
// TODO: Handle cases where Reg is a super- or sub-register of the
// destination register.
- if (Reg != MI.getOperand(0).getReg())
+ const MachineOperand &Op0 = MI.getOperand(0);
+ if (!Op0.isReg() || Reg != Op0.getReg())
return None;
// We describe SUBri or ADDri instructions.
@@ -5365,8 +5428,7 @@ Optional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI,
// TODO: Third operand can be global address (usually some string). Since
// strings can be relocated we cannot calculate their offsets for
// now.
- if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
- !MI.getOperand(2).isImm())
+ if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
return None;
Offset = MI.getOperand(2).getImm() * Sign;
@@ -5402,7 +5464,7 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
return nullptr;
Register Reg = CmpMI->getOperand(0).getReg();
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
return nullptr;
@@ -5460,3 +5522,521 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
}
+
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION I1
+/// BX LR I2
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 0 | 0 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// BL f I2
+/// B f
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 0 | 0 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerNoLRSave implies that the function should be called using
+/// a BL instruction, but doesn't require LR to be saved and restored. This
+/// happens when LR is known to be dead.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 I2
+/// I3
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 4 | 4 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerRegSave implies that the function should be called with a
+/// save and restore of LR to an available register. This allows us to avoid
+/// stack fixups. Note that this outlining variant is compatible with the
+/// NoLRSave case.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 8 | 12 |
+/// | Frame overhead in Bytes | 2 | 4 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+
+enum MachineOutlinerClass {
+ MachineOutlinerTailCall,
+ MachineOutlinerThunk,
+ MachineOutlinerNoLRSave,
+ MachineOutlinerRegSave
+};
+
+enum MachineOutlinerMBBFlags {
+ LRUnavailableSomewhere = 0x2,
+ HasCalls = 0x4,
+ UnsafeRegsDead = 0x8
+};
+
+struct OutlinerCosts {
+ const int CallTailCall;
+ const int FrameTailCall;
+ const int CallThunk;
+ const int FrameThunk;
+ const int CallNoLRSave;
+ const int FrameNoLRSave;
+ const int CallRegSave;
+ const int FrameRegSave;
+
+ OutlinerCosts(const ARMSubtarget &target)
+ : CallTailCall(target.isThumb() ? 4 : 4),
+ FrameTailCall(target.isThumb() ? 0 : 0),
+ CallThunk(target.isThumb() ? 4 : 4),
+ FrameThunk(target.isThumb() ? 0 : 0),
+ CallNoLRSave(target.isThumb() ? 4 : 4),
+ FrameNoLRSave(target.isThumb() ? 4 : 4),
+ CallRegSave(target.isThumb() ? 8 : 12),
+ FrameRegSave(target.isThumb() ? 2 : 4) {}
+};
+
+unsigned
+ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+ assert(C.LRUWasSet && "LRU wasn't set?");
+ MachineFunction *MF = C.getMF();
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+
+ BitVector regsReserved = ARI->getReservedRegs(*MF);
+ // Check if there is an available register across the sequence that we can
+ // use.
+ for (unsigned Reg : ARM::rGPRRegClass) {
+ if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
+ Reg != ARM::LR && // LR is not reserved, but don't use it.
+ Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
+ C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ return Reg;
+ }
+
+ // No suitable register. Return 0.
+ return 0u;
+}
+
+outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
+ unsigned SequenceSize =
+ std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
+ [this](unsigned Sum, const MachineInstr &MI) {
+ return Sum + getInstSizeInBytes(MI);
+ });
+
+ // Properties about candidate MBBs that hold for all of them.
+ unsigned FlagsSetInAll = 0xF;
+
+ // Compute liveness information for each candidate, and set FlagsSetInAll.
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ std::for_each(
+ RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; });
+
+ // According to the ARM Procedure Call Standard, the following are
+ // undefined on entry/exit from a function call:
+ //
+ // * Register R12(IP),
+ // * Condition codes (and thus the CPSR register)
+ //
+ // Since we control the instructions which are part of the outlined regions
+ // we don't need to be fully compliant with the AAPCS, but we have to
+ // guarantee that if a veneer is inserted at link time the code is still
+ // correct. Because of this, we can't outline any sequence of instructions
+ // where one of these registers is live into/across it. Thus, we need to
+ // delete those candidates.
+ auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
+ // If the unsafe registers in this block are all dead, then we don't need
+ // to compute liveness here.
+ if (C.Flags & UnsafeRegsDead)
+ return false;
+ C.initLRU(TRI);
+ LiveRegUnits LRU = C.LRU;
+ return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR));
+ };
+
+ // Are there any candidates where those registers are live?
+ if (!(FlagsSetInAll & UnsafeRegsDead)) {
+ // Erase every candidate that violates the restrictions above. (It could be
+ // true that we have viable candidates, so it's not worth bailing out in
+ // the case that, say, 1 out of 20 candidates violate the restructions.)
+ RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+ RepeatedSequenceLocs.end(),
+ CantGuaranteeValueAcrossCall),
+ RepeatedSequenceLocs.end());
+
+ // If the sequence doesn't have enough candidates left, then we're done.
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+ }
+
+ // At this point, we have only "safe" candidates to outline. Figure out
+ // frame + call instruction information.
+
+ unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
+
+ // Helper lambda which sets call information for every candidate.
+ auto SetCandidateCallInfo =
+ [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
+ for (outliner::Candidate &C : RepeatedSequenceLocs)
+ C.setCallInfo(CallID, NumBytesForCall);
+ };
+
+ OutlinerCosts Costs(Subtarget);
+ unsigned FrameID = 0;
+ unsigned NumBytesToCreateFrame = 0;
+
+ // If the last instruction in any candidate is a terminator, then we should
+ // tail call all of the candidates.
+ if (RepeatedSequenceLocs[0].back()->isTerminator()) {
+ FrameID = MachineOutlinerTailCall;
+ NumBytesToCreateFrame = Costs.FrameTailCall;
+ SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
+ } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
+ LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr ||
+ LastInstrOpcode == ARM::tBLXi) {
+ FrameID = MachineOutlinerThunk;
+ NumBytesToCreateFrame = Costs.FrameThunk;
+ SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
+ } else {
+ // We need to decide how to emit calls + frames. We can always emit the same
+ // frame if we don't need to save to the stack.
+ unsigned NumBytesNoStackCalls = 0;
+ std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
+
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ C.initLRU(TRI);
+
+ // Is LR available? If so, we don't need a save.
+ if (C.LRU.available(ARM::LR)) {
+ FrameID = MachineOutlinerNoLRSave;
+ NumBytesNoStackCalls += Costs.CallNoLRSave;
+ C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // Is an unused register available? If so, we won't modify the stack, so
+ // we can outline with the same frame type as those that don't save LR.
+ else if (findRegisterToSaveLRTo(C)) {
+ FrameID = MachineOutlinerRegSave;
+ NumBytesNoStackCalls += Costs.CallRegSave;
+ C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+ }
+
+ if (!CandidatesWithoutStackFixups.empty()) {
+ RepeatedSequenceLocs = CandidatesWithoutStackFixups;
+ } else
+ return outliner::OutlinedFunction();
+ }
+
+ return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
+ NumBytesToCreateFrame, FrameID);
+}
+
+bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
+ MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
+ const Function &F = MF.getFunction();
+
+ // Can F be deduplicated by the linker? If it can, don't outline from it.
+ if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
+ return false;
+
+ // Don't outline from functions with section markings; the program could
+ // expect that all the code is in the named section.
+ // FIXME: Allow outlining from multiple functions with the same section
+ // marking.
+ if (F.hasSection())
+ return false;
+
+ // FIXME: Thumb1 outlining is not handled
+ if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
+ return false;
+
+ // It's safe to outline from MF.
+ return true;
+}
+
+bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
+ // Check if LR is available through all of the MBB. If it's not, then set
+ // a flag.
+ assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
+ "Suitable Machine Function for outlining must track liveness");
+
+ LiveRegUnits LRU(getRegisterInfo());
+
+ std::for_each(MBB.rbegin(), MBB.rend(),
+ [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
+
+ // Check if each of the unsafe registers are available...
+ bool R12AvailableInBlock = LRU.available(ARM::R12);
+ bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
+
+ // If all of these are dead (and not live out), we know we don't have to check
+ // them later.
+ if (R12AvailableInBlock && CPSRAvailableInBlock)
+ Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
+
+ // Now, add the live outs to the set.
+ LRU.addLiveOuts(MBB);
+
+ // If any of these registers is available in the MBB, but also a live out of
+ // the block, then we know outlining is unsafe.
+ if (R12AvailableInBlock && !LRU.available(ARM::R12))
+ return false;
+ if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
+ return false;
+
+ // Check if there's a call inside this MachineBasicBlock. If there is, then
+ // set a flag.
+ if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
+ Flags |= MachineOutlinerMBBFlags::HasCalls;
+
+ if (!LRU.available(ARM::LR))
+ Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+
+ return true;
+}
+
+outliner::InstrType
+ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
+ unsigned Flags) const {
+ MachineInstr &MI = *MIT;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // Be conservative with inline ASM
+ if (MI.isInlineAsm())
+ return outliner::InstrType::Illegal;
+
+ // Don't allow debug values to impact outlining type.
+ if (MI.isDebugInstr() || MI.isIndirectDebugValue())
+ return outliner::InstrType::Invisible;
+
+ // At this point, KILL or IMPLICIT_DEF instructions don't really tell us much
+ // so we can go ahead and skip over them.
+ if (MI.isKill() || MI.isImplicitDef())
+ return outliner::InstrType::Invisible;
+
+ // PIC instructions contain labels, outlining them would break offset
+ // computing. unsigned Opc = MI.getOpcode();
+ unsigned Opc = MI.getOpcode();
+ if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
+ Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
+ Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
+ Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
+ Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
+ Opc == ARM::t2MOV_ga_pcrel)
+ return outliner::InstrType::Illegal;
+
+ // Be conservative with ARMv8.1 MVE instructions.
+ if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
+ Opc == ARM::t2WhileLoopStart || Opc == ARM::t2LoopDec ||
+ Opc == ARM::t2LoopEnd)
+ return outliner::InstrType::Illegal;
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t MIFlags = MCID.TSFlags;
+ if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
+ return outliner::InstrType::Illegal;
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+ // Don't outline if the branch is not unconditional.
+ if (isPredicated(MI))
+ return outliner::InstrType::Illegal;
+
+ // Is this the end of a function?
+ if (MI.getParent()->succ_empty())
+ return outliner::InstrType::Legal;
+
+ // It's not, so don't outline it.
+ return outliner::InstrType::Illegal;
+ }
+
+ // Make sure none of the operands are un-outlinable.
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+ MOP.isTargetIndex())
+ return outliner::InstrType::Illegal;
+ }
+
+ // Don't outline if link register or program counter value are used.
+ if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
+ return outliner::InstrType::Illegal;
+
+ if (MI.isCall()) {
+ // If we don't know anything about the callee, assume it depends on the
+ // stack layout of the caller. In that case, it's only legal to outline
+ // as a tail-call. Explicitly list the call instructions we know about so
+ // we don't get unexpected results with call pseudo-instructions.
+ auto UnknownCallOutlineType = outliner::InstrType::Illegal;
+ if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
+ Opc == ARM::tBLXr || Opc == ARM::tBLXi)
+ UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
+
+ return UnknownCallOutlineType;
+ }
+
+ // Since calls are handled, don't touch LR or PC
+ if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
+ return outliner::InstrType::Illegal;
+
+ // Does this use the stack?
+ if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
+ // True if there is no chance that any outlined candidate from this range
+ // could require stack fixups. That is, both
+ // * LR is available in the range (No save/restore around call)
+ // * The range doesn't include calls (No save/restore in outlined frame)
+ // are true.
+ // FIXME: This is very restrictive; the flags check the whole block,
+ // not just the bit we will try to outline.
+ bool MightNeedStackFixUp =
+ (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
+ MachineOutlinerMBBFlags::HasCalls));
+
+ if (!MightNeedStackFixUp)
+ return outliner::InstrType::Legal;
+
+ return outliner::InstrType::Illegal;
+ }
+
+ // Be conservative with IT blocks.
+ if (MI.readsRegister(ARM::ITSTATE, TRI) ||
+ MI.modifiesRegister(ARM::ITSTATE, TRI))
+ return outliner::InstrType::Illegal;
+
+ // Don't outline positions.
+ if (MI.isPosition())
+ return outliner::InstrType::Illegal;
+
+ return outliner::InstrType::Legal;
+}
+
+void ARMBaseInstrInfo::buildOutlinedFrame(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const {
+ // Nothing is needed for tail-calls.
+ if (OF.FrameConstructionID == MachineOutlinerTailCall)
+ return;
+
+ // For thunk outlining, rewrite the last instruction from a call to a
+ // tail-call.
+ if (OF.FrameConstructionID == MachineOutlinerThunk) {
+ MachineInstr *Call = &*--MBB.instr_end();
+ bool isThumb = Subtarget.isThumb();
+ unsigned FuncOp = isThumb ? 2 : 0;
+ unsigned Opc = Call->getOperand(FuncOp).isReg()
+ ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
+ : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
+ : ARM::tTAILJMPdND
+ : ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
+ .add(Call->getOperand(FuncOp));
+ if (isThumb && !Call->getOperand(FuncOp).isReg())
+ MIB.add(predOps(ARMCC::AL));
+ Call->eraseFromParent();
+ return;
+ }
+
+ // Here we have to insert the return ourselves. Get the correct opcode from
+ // current feature set.
+ BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
+ .add(predOps(ARMCC::AL));
+}
+
+MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
+ Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+ MachineFunction &MF, const outliner::Candidate &C) const {
+ MachineInstrBuilder MIB;
+ MachineBasicBlock::iterator CallPt;
+ unsigned Opc;
+ bool isThumb = Subtarget.isThumb();
+
+ // Are we tail calling?
+ if (C.CallConstructionID == MachineOutlinerTailCall) {
+ // If yes, then we can just branch to the label.
+ Opc = isThumb
+ ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
+ : ARM::TAILJMPd;
+ MIB = BuildMI(MF, DebugLoc(), get(Opc))
+ .addGlobalAddress(M.getNamedValue(MF.getName()));
+ if (isThumb)
+ MIB.add(predOps(ARMCC::AL));
+ It = MBB.insert(It, MIB);
+ return It;
+ }
+
+ // Create the call instruction.
+ Opc = isThumb ? ARM::tBL : ARM::BL;
+ MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
+ if (isThumb)
+ CallMIB.add(predOps(ARMCC::AL));
+ CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
+
+ // Can we save to a register?
+ if (C.CallConstructionID == MachineOutlinerRegSave) {
+ unsigned Reg = findRegisterToSaveLRTo(C);
+ assert(Reg != 0 && "No callee-saved register available?");
+
+ // Save and restore LR from that register.
+ if (!MBB.isLiveIn(ARM::LR))
+ MBB.addLiveIn(ARM::LR);
+ copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
+ CallPt = MBB.insert(It, CallMIB);
+ copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
+ It--;
+ return CallPt;
+ }
+ // Insert the call.
+ It = MBB.insert(It, CallMIB);
+ return It;
+}