aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp74
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARM.td218
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp134
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h1
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp1309
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h131
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp41
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h12
-rw-r--r--lib/Target/ARM/ARMCallingConv.h2
-rw-r--r--lib/Target/ARM/ARMCallingConv.td52
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp160
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp25
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h11
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp283
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp143
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp162
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp3
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp1043
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1484
-rw-r--r--lib/Target/ARM/ARMISelLowering.h99
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td212
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp22
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td355
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td157
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td346
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td430
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp265
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp43
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h1
-rw-r--r--lib/Target/ARM/ARMOptimizeBarriersPass.cpp8
-rw-r--r--lib/Target/ARM/ARMSchedule.td14
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td2
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td4
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td2
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp58
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h42
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp157
-rw-r--r--lib/Target/ARM/ARMSubtarget.h273
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp54
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h31
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h8
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp42
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h17
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp309
-rw-r--r--lib/Target/ARM/AsmParser/Makefile15
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp108
-rw-r--r--lib/Target/ARM/Disassembler/Makefile16
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp120
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h9
-rw-r--r--lib/Target/ARM/InstPrinter/Makefile15
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h51
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp83
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp19
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp120
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp16
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp9
-rw-r--r--lib/Target/ARM/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp6
-rw-r--r--lib/Target/ARM/Makefile24
-rw-r--r--lib/Target/ARM/README.txt18
-rw-r--r--lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp1
-rw-r--r--lib/Target/ARM/TargetInfo/Makefile15
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp56
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h2
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp19
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h8
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp11
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp55
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h12
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp68
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp120
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.h5
85 files changed, 6142 insertions, 3183 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 7a1865ce5fd6..9228cc2d7a9c 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -68,34 +68,31 @@ namespace {
//
unsigned createDupLane(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL,
- unsigned Reg, unsigned Lane,
- bool QPR=false);
+ const DebugLoc &DL, unsigned Reg, unsigned Lane,
+ bool QPR = false);
unsigned createExtractSubreg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL,
- unsigned DReg, unsigned Lane,
- const TargetRegisterClass *TRC);
+ const DebugLoc &DL, unsigned DReg,
+ unsigned Lane, const TargetRegisterClass *TRC);
unsigned createVExt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL,
- unsigned Ssub0, unsigned Ssub1);
+ const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1);
unsigned createRegSequence(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL,
- unsigned Reg1, unsigned Reg2);
+ const DebugLoc &DL, unsigned Reg1,
+ unsigned Reg2);
unsigned createInsertSubreg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL, unsigned DReg, unsigned Lane,
- unsigned ToInsert);
+ const DebugLoc &DL, unsigned DReg,
+ unsigned Lane, unsigned ToInsert);
unsigned createImplicitDef(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL);
+ const DebugLoc &DL);
//
// Various property checkers
@@ -426,11 +423,10 @@ SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
}
// Creates a DPR register from an SPR one by using a VDUP.
-unsigned
-A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL,
- unsigned Reg, unsigned Lane, bool QPR) {
+unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const DebugLoc &DL, unsigned Reg,
+ unsigned Lane, bool QPR) {
unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
&ARM::DPRRegClass);
AddDefaultPred(BuildMI(MBB,
@@ -445,12 +441,10 @@ A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
}
// Creates a SPR register from a DPR by copying the value in lane 0.
-unsigned
-A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL,
- unsigned DReg, unsigned Lane,
- const TargetRegisterClass *TRC) {
+unsigned A15SDOptimizer::createExtractSubreg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ const DebugLoc &DL, unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC) {
unsigned Out = MRI->createVirtualRegister(TRC);
BuildMI(MBB,
InsertBefore,
@@ -462,11 +456,9 @@ A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
}
// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
-unsigned
-A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL,
- unsigned Reg1, unsigned Reg2) {
+unsigned A15SDOptimizer::createRegSequence(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ const DebugLoc &DL, unsigned Reg1, unsigned Reg2) {
unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
BuildMI(MBB,
InsertBefore,
@@ -481,11 +473,10 @@ A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
// and merges them into one DPR register.
-unsigned
-A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL,
- unsigned Ssub0, unsigned Ssub1) {
+unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const DebugLoc &DL, unsigned Ssub0,
+ unsigned Ssub1) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
AddDefaultPred(BuildMI(MBB,
InsertBefore,
@@ -497,11 +488,9 @@ A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
return Out;
}
-unsigned
-A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL, unsigned DReg, unsigned Lane,
- unsigned ToInsert) {
+unsigned A15SDOptimizer::createInsertSubreg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
BuildMI(MBB,
InsertBefore,
@@ -517,7 +506,7 @@ A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
unsigned
A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
- DebugLoc DL) {
+ const DebugLoc &DL) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
BuildMI(MBB,
InsertBefore,
@@ -681,6 +670,9 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
}
bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(*Fn.getFunction()))
+ return false;
+
const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
// Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
// enabled when NEON is available.
@@ -701,7 +693,7 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
MI != ME;) {
- Modified |= runOnInstruction(MI++);
+ Modified |= runOnInstruction(&*MI++);
}
}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index cd7540e52410..690ff86a0c86 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -27,6 +27,7 @@ class FunctionPass;
class ImmutablePass;
class MachineInstr;
class MCInst;
+class PassRegistry;
class TargetLowering;
class TargetMachine;
@@ -45,6 +46,9 @@ FunctionPass *createThumb2SizeReductionPass(
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
+void initializeARMLoadStoreOptPass(PassRegistry &);
+void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+
} // end namespace llvm;
#endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index c171656b48ab..ef626b66a1e7 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -76,6 +76,11 @@ def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
"Has data barrier (dmb / dsb) instructions">;
+def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
+ "Has v7 clrex instruction">;
+def FeatureAcquireRelease : SubtargetFeature<"acquire-release",
+ "HasAcquireRelease", "true",
+ "Has v8 acquire/release (lda/ldaex etc) instructions">;
def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
@@ -84,17 +89,98 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable support for Performance Monitor extensions">;
def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
"Enable support for TrustZone security extensions">;
+def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true",
+ "Enable support for ARMv8-M Security Extensions">;
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
"Enable support for Cryptography extensions",
[FeatureNEON]>;
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable support for CRC instructions">;
+// Not to be confused with FeatureHasRetAddrStack (return address stack)
+def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
+ "Enable Reliability, Availability and Serviceability extensions">;
+
// Cyclone has preferred instructions for zeroing VFP registers, which can
// execute in 0 cycles.
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
+// Whether or not it may be profitable to unpredicate certain instructions
+// during if conversion.
+def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr",
+ "IsProfitableToUnpredicate",
+ "true",
+ "Is profitable to unpredicate">;
+
+// Some targets (e.g. Swift) have microcoded VGETLNi32.
+def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32",
+ "HasSlowVGETLNi32", "true",
+ "Has slow VGETLNi32 - prefer VMOV">;
+
+// Some targets (e.g. Swift) have microcoded VDUP32.
+def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true",
+ "Has slow VDUP32 - prefer VMOV">;
+
+// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON
+// for scalar FP, as this allows more effective execution domain optimization.
+def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
+ "true", "Prefer VMOVSR">;
+
+// Swift has ISHST barriers compatible with Atomic Release semantics but weaker
+// than ISH
+def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST",
+ "true", "Prefer ISHST barriers">;
+
+// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU.
+def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true",
+ "Has muxed AGU and NEON/FPU">;
+
+// On some targets, a VLDM/VSTM starting with an odd register number needs more
+// microops than single VLDRS.
+def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister",
+ "true", "VLDM/VSTM starting with an odd register is slow">;
+
+// Some targets have a renaming dependency when loading into D subregisters.
+def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
+ "SlowLoadDSubregister", "true",
+ "Loading into D subregs is slow">;
+// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
+def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
+ "DontWidenVMOVS", "true",
+ "Don't widen VMOVS to VMOVD">;
+
+// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
+def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true",
+ "Expand VFP/NEON MLA/MLS instructions">;
+
+// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS.
+def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",
+ "true", "Has VMLx hazards">;
+
+// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
+// VFP to NEON, as an execution domain optimization.
+def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs",
+ "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">;
+
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations. This affects instruction selection and should
+// only be enabled if the handling of denormals is not important.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
+
+// On some processors, VLDn instructions that access unaligned data take one
+// extra cycle. Take that into account when computing operand latencies.
+def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign",
+ "true",
+ "Check for VLDn unaligned access">;
+
+// Some processors have a nonpipelined VFP coprocessor.
+def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
+ "NonpipelinedVFP", "true",
+ "VFP instructions are not pipelined">;
+
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
// to just not use them.
@@ -106,12 +192,6 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
"HasVMLxForwarding", "true",
"Has multiplier accumulator forwarding">;
-// Some processors benefit from using NEON instructions for scalar
-// single-precision FP operations.
-def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
- "true",
- "Use NEON for single precision FP">;
-
// Disable 32-bit to 16-bit narrowing for experimentation.
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
@@ -130,7 +210,7 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
// Some processors perform return stack prediction. CodeGen should avoid issue
// "normal" call instructions to callees which do not return.
-def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
+def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true",
"Has return address stack">;
/// DSP extension.
@@ -200,24 +280,31 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
"Support ARM v6M instructions",
[HasV6Ops]>;
+def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true",
+ "Support ARM v8M Baseline instructions",
+ [HasV6MOps]>;
def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true",
"Support ARM v6k instructions",
[HasV6Ops]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6MOps, HasV6KOps, FeatureThumb2]>;
+ [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
- [HasV6T2Ops, FeaturePerfMon]>;
+ [HasV6T2Ops, FeaturePerfMon,
+ FeatureV7Clrex]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops]>;
+ [HasV7Ops, FeatureAcquireRelease]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
[HasV8Ops]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions",
[HasV8_1aOps]>;
+def HasV8MMainlineOps : SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true",
+ "Support ARM v8M Mainline instructions",
+ [HasV7Ops]>;
//===----------------------------------------------------------------------===//
@@ -238,6 +325,8 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors", []>;
def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
"Cortex-A17 ARM processors", []>;
+def ProcA32 : SubtargetFeature<"a32", "ARMProcFamily", "CortexA32",
+ "Cortex-A32 ARM processors", []>;
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", []>;
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
@@ -246,6 +335,8 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors", []>;
def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
"Cortex-A72 ARM processors", []>;
+def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
+ "Cortex-A73 ARM processors", []>;
def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
"Qualcomm ARM processors", []>;
@@ -256,12 +347,14 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M1 processors", []>;
def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
- "Cortex-R4 ARM processors", []>;
+ "Cortex-R4 ARM processors", []>;
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors", []>;
def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7",
"Cortex-R7 ARM processors", []>;
+def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
+ "Cortex-M3 ARM processors", []>;
//===----------------------------------------------------------------------===//
// ARM schedules.
@@ -374,7 +467,27 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
FeatureMP,
FeatureVirtualization,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeatureRAS]>;
+
+def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline",
+ [HasV8MBaselineOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureV7Clrex,
+ Feature8MSecExt,
+ FeatureAcquireRelease,
+ FeatureMClass]>;
+
+def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline",
+ [HasV8MMainlineOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ Feature8MSecExt,
+ FeatureAcquireRelease,
+ FeatureMClass]>;
// Aliases
def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>;
@@ -452,7 +565,7 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2,
// FIXME: A5 has currently the same Schedule model as A8
def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureTrustZone,
FeatureSlowFPBrcc,
FeatureHasSlowFPVMLx,
@@ -462,9 +575,10 @@ def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
FeatureVFP4]>;
def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureTrustZone,
FeatureSlowFPBrcc,
+ FeatureHasVMLxHazards,
FeatureHasSlowFPVMLx,
FeatureVMLxForwarding,
FeatureT2XtPk,
@@ -475,25 +589,33 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
FeatureVirtualization]>;
def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
+ FeatureNonpipelinedVFP,
FeatureTrustZone,
FeatureSlowFPBrcc,
+ FeatureHasVMLxHazards,
FeatureHasSlowFPVMLx,
FeatureVMLxForwarding,
FeatureT2XtPk]>;
def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureTrustZone,
+ FeatureHasVMLxHazards,
FeatureVMLxForwarding,
FeatureT2XtPk,
FeatureFP16,
FeatureAvoidPartialCPSR,
+ FeatureExpandMLx,
+ FeaturePreferVMOVSR,
+ FeatureMuxedUnits,
+ FeatureNEONForFPMovs,
+ FeatureCheckVLDnAlign,
FeatureMP]>;
// FIXME: A12 has currently the same Schedule model as A9
def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureTrustZone,
FeatureVMLxForwarding,
FeatureT2XtPk,
@@ -506,11 +628,14 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
// FIXME: A15 has currently the same Schedule model as A9.
def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
- FeatureHasRAS,
+ FeatureDontWidenVMOVS,
+ FeatureHasRetAddrStack,
+ FeatureMuxedUnits,
FeatureTrustZone,
FeatureT2XtPk,
FeatureVFP4,
FeatureMP,
+ FeatureCheckVLDnAlign,
FeatureHWDiv,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
@@ -518,7 +643,7 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
// FIXME: A17 has currently the same Schedule model as A9
def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureTrustZone,
FeatureMP,
FeatureVMLxForwarding,
@@ -533,7 +658,9 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
// FIXME: krait has currently the same features as A9 plus VFP4 and hardware
// division features.
def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
+ FeatureMuxedUnits,
+ FeatureCheckVLDnAlign,
FeatureVMLxForwarding,
FeatureT2XtPk,
FeatureFP16,
@@ -543,7 +670,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
FeatureHWDivARM]>;
def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureNEONForFP,
FeatureT2XtPk,
FeatureVFP4,
@@ -552,17 +679,24 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
- FeatureHasSlowFPVMLx]>;
+ FeatureHasSlowFPVMLx,
+ FeatureHasVMLxHazards,
+ FeatureProfUnpredicate,
+ FeaturePrefISHSTBarrier,
+ FeatureSlowOddRegister,
+ FeatureSlowLoadDSubreg,
+ FeatureSlowVGETLNi32,
+ FeatureSlowVDUP32]>;
// FIXME: R4 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureAvoidPartialCPSR,
FeatureT2XtPk]>;
// FIXME: R4F has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureSlowFPBrcc,
FeatureHasSlowFPVMLx,
FeatureVFP3,
@@ -572,7 +706,7 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureVFP3,
FeatureD16,
FeatureSlowFPBrcc,
@@ -583,9 +717,20 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureFP16,
+ FeatureMP,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
+ FeatureHasRetAddrStack,
FeatureVFP3,
- FeatureVFPOnlySP,
FeatureD16,
FeatureFP16,
FeatureMP,
@@ -595,8 +740,8 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
FeatureAvoidPartialCPSR,
FeatureT2XtPk]>;
-def : ProcNoItin<"cortex-m3", [ARMv7m]>;
-def : ProcNoItin<"sc300", [ARMv7m]>;
+def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>;
+def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>;
def : ProcNoItin<"cortex-m4", [ARMv7em,
FeatureVFP4,
@@ -607,6 +752,12 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
FeatureFPARMv8,
FeatureD16]>;
+def : ProcNoItin<"cortex-a32", [ARMv8a,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
FeatureHWDiv,
@@ -636,9 +787,16 @@ def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
FeatureCrypto,
FeatureCRC]>;
+def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
// Cyclone is very similar to swift
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
- FeatureHasRAS,
+ FeatureHasRetAddrStack,
FeatureNEONForFP,
FeatureT2XtPk,
FeatureVFP4,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 206db9619a2f..04863a7ecf8f 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -43,12 +43,11 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ARMBuildAttributes.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/COFF.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -213,8 +212,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
GetARMGVSymbol(GV, TF)->print(O, MAI);
printOffset(MO.getOffset(), O);
- if (TF == ARMII::MO_PLT)
- O << "(PLT)";
break;
}
case MachineOperand::MO_ConstantPoolIndex:
@@ -516,9 +513,10 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer->AddBlankLine();
}
- Stubs = MMIMacho.GetHiddenGVStubList();
+ Stubs = MMIMacho.GetThreadLocalGVStubList();
if (!Stubs.empty()) {
- OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection());
EmitAlignment(2);
for (auto &Stub : Stubs)
@@ -536,18 +534,48 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+ if (TT.isOSBinFormatCOFF()) {
+ const auto &TLOF =
+ static_cast<const TargetLoweringObjectFileCOFF &>(getObjFileLowering());
+
+ std::string Flags;
+ raw_string_ostream OS(Flags);
+
+ for (const auto &Function : M)
+ TLOF.emitLinkerFlagsForGlobal(OS, &Function, *Mang);
+ for (const auto &Global : M.globals())
+ TLOF.emitLinkerFlagsForGlobal(OS, &Global, *Mang);
+ for (const auto &Alias : M.aliases())
+ TLOF.emitLinkerFlagsForGlobal(OS, &Alias, *Mang);
+
+ OS.flush();
+
+ // Output collected flags
+ if (!Flags.empty()) {
+ OutStreamer->SwitchSection(TLOF.getDrectveSection());
+ OutStreamer->EmitBytes(Flags);
+ }
+ }
+
// The last attribute to be emitted is ABI_optimization_goals
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
if (OptimizationGoals > 0 &&
- (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI()))
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
+ Subtarget->isTargetMuslAEABI()))
ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals);
OptimizationGoals = -1;
ATS.finishAttributeSection();
}
+static bool isV8M(const ARMSubtarget *Subtarget) {
+ // Note that v8M Baseline is a subset of v6T2!
+ return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) ||
+ Subtarget->hasV8MMainlineOps();
+}
+
//===----------------------------------------------------------------------===//
// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
// FIXME:
@@ -561,13 +589,17 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
return ARMBuildAttrs::v5TEJ;
if (Subtarget->hasV8Ops())
- return ARMBuildAttrs::v8;
+ return ARMBuildAttrs::v8_A;
+ else if (Subtarget->hasV8MMainlineOps())
+ return ARMBuildAttrs::v8_M_Main;
else if (Subtarget->hasV7Ops()) {
if (Subtarget->isMClass() && Subtarget->hasDSP())
return ARMBuildAttrs::v7E_M;
return ARMBuildAttrs::v7;
} else if (Subtarget->hasV6T2Ops())
return ARMBuildAttrs::v6T2;
+ else if (Subtarget->hasV8MBaselineOps())
+ return ARMBuildAttrs::v8_M_Base;
else if (Subtarget->hasV6MOps())
return ARMBuildAttrs::v6S_M;
else if (Subtarget->hasV6Ops())
@@ -609,9 +641,9 @@ void ARMAsmPrinter::emitAttributes() {
static_cast<const ARMBaseTargetMachine &>(TM);
const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
- std::string CPUString = STI.getCPUString();
+ const std::string &CPUString = STI.getCPUString();
- if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic"
+ if (!StringRef(CPUString).startswith("generic")) {
// FIXME: remove krait check when GNU tools support krait cpu
if (STI.isKrait()) {
ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
@@ -627,7 +659,7 @@ void ARMAsmPrinter::emitAttributes() {
// Tag_CPU_arch_profile must have the default value of 0 when "Architecture
// profile is not applicable (e.g. pre v7, or cross-profile code)".
- if (STI.hasV7Ops()) {
+ if (STI.hasV7Ops() || isV8M(&STI)) {
if (STI.isAClass()) {
ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
ARMBuildAttrs::ApplicationProfile);
@@ -643,7 +675,10 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use,
STI.hasARMOps() ? ARMBuildAttrs::Allowed
: ARMBuildAttrs::Not_Allowed);
- if (STI.isThumb1Only()) {
+ if (isV8M(&STI)) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumbDerived);
+ } else if (STI.isThumb1Only()) {
ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
} else if (STI.hasThumb2()) {
ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
@@ -690,7 +725,7 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitFPU(ARM::FK_VFPV2);
}
- if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (isPositionIndependent()) {
// PIC specific attributes.
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data,
ARMBuildAttrs::AddressRWPCRel);
@@ -794,6 +829,9 @@ void ARMAsmPrinter::emitAttributes() {
if (STI.hasDivideInARMMode() && !STI.hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+ if (STI.hasDSP() && isV8M(&STI))
+ ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
+
if (MMI) {
if (const Module *SourceModule = MMI->getModule()) {
// ABI_PCS_wchar_t to indicate wchar_t width
@@ -853,11 +891,18 @@ static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
static MCSymbolRefExpr::VariantKind
getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
switch (Modifier) {
- case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
- case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
- case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF;
- case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF;
- case ARMCP::GOT_PREL: return MCSymbolRefExpr::VK_ARM_GOT_PREL;
+ case ARMCP::no_modifier:
+ return MCSymbolRefExpr::VK_None;
+ case ARMCP::TLSGD:
+ return MCSymbolRefExpr::VK_TLSGD;
+ case ARMCP::TPOFF:
+ return MCSymbolRefExpr::VK_TPOFF;
+ case ARMCP::GOTTPOFF:
+ return MCSymbolRefExpr::VK_GOTTPOFF;
+ case ARMCP::GOT_PREL:
+ return MCSymbolRefExpr::VK_ARM_GOT_PREL;
+ case ARMCP::SECREL:
+ return MCSymbolRefExpr::VK_SECREL;
}
llvm_unreachable("Invalid ARMCPModifier!");
}
@@ -865,8 +910,8 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
unsigned char TargetFlags) {
if (Subtarget->isTargetMachO()) {
- bool IsIndirect = (TargetFlags & ARMII::MO_NONLAZY) &&
- Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+ bool IsIndirect =
+ (TargetFlags & ARMII::MO_NONLAZY) && Subtarget->isGVIndirectSymbol(GV);
if (!IsIndirect)
return getSymbol(GV);
@@ -876,8 +921,9 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
MachineModuleInfoMachO &MMIMachO =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
MachineModuleInfoImpl::StubValueTy &StubSym =
- GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym)
- : MMIMachO.getGVStubEntry(MCSym);
+ GV->isThreadLocal() ? MMIMachO.getThreadLocalGVStubEntry(MCSym)
+ : MMIMachO.getGVStubEntry(MCSym);
+
if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
!GV->hasInternalLinkage());
@@ -991,7 +1037,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
// .word (LBB1 - LJTI_0_0)
const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
- if (TM.getRelocationModel() == Reloc::PIC_)
+ if (isPositionIndependent())
Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol,
OutContext),
OutContext);
@@ -1227,6 +1273,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const DataLayout &DL = getDataLayout();
+ MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1643,29 +1691,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
// FIXME: Remove this special case when they do.
if (!Subtarget->isTargetMachO()) {
- //.long 0xe7ffdefe @ trap
uint32_t Val = 0xe7ffdefeUL;
OutStreamer->AddComment("trap");
- OutStreamer->EmitIntValue(Val, 4);
+ ATS.emitInst(Val);
return;
}
break;
}
case ARM::TRAPNaCl: {
- //.long 0xe7fedef0 @ trap
uint32_t Val = 0xe7fedef0UL;
OutStreamer->AddComment("trap");
- OutStreamer->EmitIntValue(Val, 4);
+ ATS.emitInst(Val);
return;
}
case ARM::tTRAP: {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
// FIXME: Remove this special case when they do.
if (!Subtarget->isTargetMachO()) {
- //.short 57086 @ trap
uint16_t Val = 0xdefe;
OutStreamer->AddComment("trap");
- OutStreamer->EmitIntValue(Val, 2);
+ ATS.emitInst(Val, 'n');
return;
}
break;
@@ -1845,6 +1890,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// bx $scratch
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ScratchReg = MI->getOperand(1).getReg();
+
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
.addReg(ScratchReg)
.addReg(SrcReg)
@@ -1885,6 +1931,36 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0));
return;
}
+ case ARM::tInt_WIN_eh_sjlj_longjmp: {
+ // ldr.w r11, [$src, #0]
+ // ldr.w sp, [$src, #8]
+ // ldr.w pc, [$src, #4]
+
+ unsigned SrcReg = MI->getOperand(0).getReg();
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12)
+ .addReg(ARM::R11)
+ .addReg(SrcReg)
+ .addImm(0)
+ // Predicate
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12)
+ .addReg(ARM::SP)
+ .addReg(SrcReg)
+ .addImm(8)
+ // Predicate
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12)
+ .addReg(ARM::PC)
+ .addReg(SrcReg)
+ .addImm(4)
+ // Predicate
+ .addImm(ARMCC::AL)
+ .addReg(0));
+ return;
+ }
}
MCInst TmpInst;
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index ed7be2de51ca..97f5ca0ecbc2 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -95,6 +95,7 @@ public:
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
private:
+
// Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
void emitAttributes();
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 49f328852667..693f16499717 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -51,15 +51,6 @@ static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
-static cl::opt<bool>
-WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
- cl::desc("Widen ARM vmovs to vmovd when possible"));
-
-static cl::opt<unsigned>
-SwiftPartialUpdateClearance("swift-partial-update-clearance",
- cl::Hidden, cl::init(12),
- cl::desc("Clearance before partial register updates"));
-
/// ARM_MLxEntry - Record information about MLA / MLS instructions.
struct ARM_MLxEntry {
uint16_t MLxOpc; // MLA / MLS opcode
@@ -124,18 +115,15 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
-MachineInstr *
-ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI,
- LiveVariables *LV) const {
+MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
+ MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
// FIXME: Thumb2 support.
if (!EnableARM3Addr)
return nullptr;
- MachineInstr *MI = MBBI;
- MachineFunction &MF = *MI->getParent()->getParent();
- uint64_t TSFlags = MI->getDesc().TSFlags;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ uint64_t TSFlags = MI.getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
default: return nullptr;
@@ -148,24 +136,24 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// Try splitting an indexed load/store to an un-indexed one plus an add/sub
// operation.
- unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());
if (MemOpc == 0)
return nullptr;
MachineInstr *UpdateMI = nullptr;
MachineInstr *MemMI = nullptr;
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
- const MCInstrDesc &MCID = MI->getDesc();
+ const MCInstrDesc &MCID = MI.getDesc();
unsigned NumOps = MCID.getNumOperands();
- bool isLoad = !MI->mayStore();
- const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
- const MachineOperand &Base = MI->getOperand(2);
- const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ bool isLoad = !MI.mayStore();
+ const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);
+ const MachineOperand &Base = MI.getOperand(2);
+ const MachineOperand &Offset = MI.getOperand(NumOps - 3);
unsigned WBReg = WB.getReg();
unsigned BaseReg = Base.getReg();
unsigned OffReg = Offset.getReg();
- unsigned OffImm = MI->getOperand(NumOps-2).getImm();
- ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ unsigned OffImm = MI.getOperand(NumOps - 2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();
switch (AddrMode) {
default: llvm_unreachable("Unknown indexed op!");
case ARMII::AddrMode2: {
@@ -176,22 +164,33 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// Can't encode it in a so_imm operand. This transformation will
// add more than 1 instruction. Abandon!
return nullptr;
- UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
- .addReg(BaseReg).addImm(Amt)
- .addImm(Pred).addReg(0).addReg(0);
+ .addReg(BaseReg)
+ .addImm(Amt)
+ .addImm(Pred)
+ .addReg(0)
+ .addReg(0);
} else if (Amt != 0) {
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
- UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
- .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
- .addImm(Pred).addReg(0).addReg(0);
+ .addReg(BaseReg)
+ .addReg(OffReg)
+ .addReg(0)
+ .addImm(SOOpc)
+ .addImm(Pred)
+ .addReg(0)
+ .addReg(0);
} else
- UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
- .addReg(BaseReg).addReg(OffReg)
- .addImm(Pred).addReg(0).addReg(0);
+ .addReg(BaseReg)
+ .addReg(OffReg)
+ .addImm(Pred)
+ .addReg(0)
+ .addReg(0);
break;
}
case ARMII::AddrMode3 : {
@@ -199,15 +198,21 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Amt = ARM_AM::getAM3Offset(OffImm);
if (OffReg == 0)
// Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
- UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
- .addReg(BaseReg).addImm(Amt)
- .addImm(Pred).addReg(0).addReg(0);
+ .addReg(BaseReg)
+ .addImm(Amt)
+ .addImm(Pred)
+ .addReg(0)
+ .addReg(0);
else
- UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
- .addReg(BaseReg).addReg(OffReg)
- .addImm(Pred).addReg(0).addReg(0);
+ .addReg(BaseReg)
+ .addReg(OffReg)
+ .addImm(Pred)
+ .addReg(0)
+ .addReg(0);
break;
}
}
@@ -215,24 +220,34 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
std::vector<MachineInstr*> NewMIs;
if (isPre) {
if (isLoad)
- MemMI = BuildMI(MF, MI->getDebugLoc(),
- get(MemOpc), MI->getOperand(0).getReg())
- .addReg(WBReg).addImm(0).addImm(Pred);
+ MemMI =
+ BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
+ .addReg(WBReg)
+ .addImm(0)
+ .addImm(Pred);
else
- MemMI = BuildMI(MF, MI->getDebugLoc(),
- get(MemOpc)).addReg(MI->getOperand(1).getReg())
- .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
+ .addReg(MI.getOperand(1).getReg())
+ .addReg(WBReg)
+ .addReg(0)
+ .addImm(0)
+ .addImm(Pred);
NewMIs.push_back(MemMI);
NewMIs.push_back(UpdateMI);
} else {
if (isLoad)
- MemMI = BuildMI(MF, MI->getDebugLoc(),
- get(MemOpc), MI->getOperand(0).getReg())
- .addReg(BaseReg).addImm(0).addImm(Pred);
+ MemMI =
+ BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())
+ .addReg(BaseReg)
+ .addImm(0)
+ .addImm(Pred);
else
- MemMI = BuildMI(MF, MI->getDebugLoc(),
- get(MemOpc)).addReg(MI->getOperand(1).getReg())
- .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))
+ .addReg(MI.getOperand(1).getReg())
+ .addReg(BaseReg)
+ .addReg(0)
+ .addImm(0)
+ .addImm(Pred);
if (WB.isDead())
UpdateMI->getOperand(0).setIsDead();
NewMIs.push_back(UpdateMI);
@@ -241,8 +256,8 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// Transfer LiveVariables states, kill / dead info.
if (LV) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
unsigned Reg = MO.getReg();
@@ -250,7 +265,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (MO.isDef()) {
MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
if (MO.isDead())
- LV->addVirtualRegisterDead(Reg, NewMI);
+ LV->addVirtualRegisterDead(Reg, *NewMI);
}
if (MO.isUse() && MO.isKill()) {
for (unsigned j = 0; j < 2; ++j) {
@@ -258,7 +273,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *NewMI = NewMIs[j];
if (!NewMI->readsRegister(Reg))
continue;
- LV->addVirtualRegisterKilled(Reg, NewMI);
+ LV->addVirtualRegisterKilled(Reg, *NewMI);
if (VI.removeKill(MI))
VI.Kills.push_back(NewMI);
break;
@@ -268,17 +283,18 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
}
+ MachineBasicBlock::iterator MBBI = MI.getIterator();
MFI->insert(MBBI, NewMIs[1]);
MFI->insert(MBBI, NewMIs[0]);
return NewMIs[0];
}
// Branch analysis.
-bool
-ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
+bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
TBB = nullptr;
FBB = nullptr;
@@ -289,7 +305,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
// Walk backwards from the end of the basic block until the branch is
// analyzed or we give up.
- while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) {
+ while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
// Flag to be raised on unanalyzeable instructions. This is useful in cases
// where we want to clean up on the end of the basic block before we bail
@@ -322,7 +338,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(I->getOperand(2));
} else if (I->isReturn()) {
// Returns can't be analyzed, but we should run cleanup.
- CantAnalyze = !isPredicated(I);
+ CantAnalyze = !isPredicated(*I);
} else {
// We encountered other unrecognized terminator. Bail out immediately.
return true;
@@ -330,7 +346,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
// Cleanup code - to be run for unpredicated unconditional branches and
// returns.
- if (!isPredicated(I) &&
+ if (!isPredicated(*I) &&
(isUncondBranchOpcode(I->getOpcode()) ||
isIndirectBranchOpcode(I->getOpcode()) ||
isJumpTableBranchOpcode(I->getOpcode()) ||
@@ -344,9 +360,9 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
if (AllowModify) {
MachineBasicBlock::iterator DI = std::next(I);
while (DI != MBB.end()) {
- MachineInstr *InstToDelete = DI;
+ MachineInstr &InstToDelete = *DI;
++DI;
- InstToDelete->eraseFromParent();
+ InstToDelete.eraseFromParent();
}
}
}
@@ -390,11 +406,11 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-unsigned
-ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- ArrayRef<MachineOperand> Cond,
- DebugLoc DL) const {
+unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL) const {
ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
int BOpc = !AFI->isThumbFunction()
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
@@ -438,10 +454,10 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
- if (MI->isBundle()) {
- MachineBasicBlock::const_instr_iterator I = MI->getIterator();
- MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
+ if (MI.isBundle()) {
+ MachineBasicBlock::const_instr_iterator I = MI.getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
int PIdx = I->findFirstPredOperandIdx();
if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
@@ -450,26 +466,26 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
return false;
}
- int PIdx = MI->findFirstPredOperandIdx();
- return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+ int PIdx = MI.findFirstPredOperandIdx();
+ return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
}
-bool ARMBaseInstrInfo::
-PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
- unsigned Opc = MI->getOpcode();
+bool ARMBaseInstrInfo::PredicateInstruction(
+ MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
+ unsigned Opc = MI.getOpcode();
if (isUncondBranchOpcode(Opc)) {
- MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
- MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
return true;
}
- int PIdx = MI->findFirstPredOperandIdx();
+ int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1) {
- MachineOperand &PMO = MI->getOperand(PIdx);
+ MachineOperand &PMO = MI.getOperand(PIdx);
PMO.setImm(Pred[0].getImm());
- MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
return true;
}
return false;
@@ -501,11 +517,11 @@ bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
}
}
-bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const {
+bool ARMBaseInstrInfo::DefinesPredicate(
+ MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
bool Found = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
(MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
Pred.push_back(MO);
@@ -555,21 +571,21 @@ static bool isEligibleForITBlock(const MachineInstr *MI) {
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
-bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
- if (!MI->isPredicable())
+bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
+ if (!MI.isPredicable())
return false;
- if (!isEligibleForITBlock(MI))
+ if (!isEligibleForITBlock(&MI))
return false;
ARMFunctionInfo *AFI =
- MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
if (AFI->isThumb2Function()) {
if (getSubtarget().restrictIT())
- return isV8EligibleForIT(MI);
+ return isV8EligibleForIT(&MI);
} else { // non-Thumb
- if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
return false;
}
@@ -594,19 +610,19 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
/// GetInstSize - Return the size of the specified MachineInstr.
///
-unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const MachineBasicBlock &MBB = *MI->getParent();
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+ const MachineBasicBlock &MBB = *MI.getParent();
const MachineFunction *MF = MBB.getParent();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
- const MCInstrDesc &MCID = MI->getDesc();
+ const MCInstrDesc &MCID = MI.getDesc();
if (MCID.getSize())
return MCID.getSize();
// If this machine instr is an inline asm, measure it.
- if (MI->getOpcode() == ARM::INLINEASM)
- return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
- unsigned Opc = MI->getOpcode();
+ if (MI.getOpcode() == ARM::INLINEASM)
+ return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ unsigned Opc = MI.getOpcode();
switch (Opc) {
default:
// pseudo-instruction sizes are zero.
@@ -628,11 +644,13 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case ARM::JUMPTABLE_TBH:
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
- return MI->getOperand(2).getImm();
+ return MI.getOperand(2).getImm();
case ARM::Int_eh_sjlj_longjmp:
return 16;
case ARM::tInt_eh_sjlj_longjmp:
return 10;
+ case ARM::tInt_WIN_eh_sjlj_longjmp:
+ return 12;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
return 20;
@@ -641,17 +659,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case ARM::t2Int_eh_sjlj_setjmp_nofp:
return 12;
case ARM::SPACE:
- return MI->getOperand(1).getImm();
+ return MI.getOperand(1).getImm();
}
}
-unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
unsigned Size = 0;
- MachineBasicBlock::const_instr_iterator I = MI->getIterator();
- MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I = MI.getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
- Size += GetInstSizeInBytes(&*I);
+ Size += GetInstSizeInBytes(*I);
}
return Size;
}
@@ -700,9 +718,9 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
}
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
@@ -976,20 +994,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
}
-unsigned
-ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
+unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ switch (MI.getOpcode()) {
default: break;
case ARM::STRrs:
case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isReg() &&
- MI->getOperand(3).isImm() &&
- MI->getOperand(2).getReg() == 0 &&
- MI->getOperand(3).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
+ MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
+ MI.getOperand(3).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
case ARM::STRi12:
@@ -997,27 +1012,24 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case ARM::tSTRspi:
case ARM::VSTRD:
case ARM::VSTRS:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
case ARM::VST1q64:
case ARM::VST1d64TPseudo:
case ARM::VST1d64QPseudo:
- if (MI->getOperand(0).isFI() &&
- MI->getOperand(2).getSubReg() == 0) {
- FrameIndex = MI->getOperand(0).getIndex();
- return MI->getOperand(2).getReg();
+ if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {
+ FrameIndex = MI.getOperand(0).getIndex();
+ return MI.getOperand(2).getReg();
}
break;
case ARM::VSTMQIA:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(0).getSubReg() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
}
@@ -1025,10 +1037,10 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ return MI.mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
void ARMBaseInstrInfo::
@@ -1164,20 +1176,17 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
}
-unsigned
-ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
+unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ switch (MI.getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isReg() &&
- MI->getOperand(3).isImm() &&
- MI->getOperand(2).getReg() == 0 &&
- MI->getOperand(3).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&
+ MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&
+ MI.getOperand(3).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
case ARM::LDRi12:
@@ -1185,27 +1194,24 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case ARM::tLDRspi:
case ARM::VLDRD:
case ARM::VLDRS:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
case ARM::VLD1q64:
case ARM::VLD1d64TPseudo:
case ARM::VLD1d64QPseudo:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(0).getSubReg() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
case ARM::VLDMQIA:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(0).getSubReg() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
break;
}
@@ -1213,20 +1219,19 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return 0;
}
-unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const {
+unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
+ int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ return MI.mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
/// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
/// depending on whether the result is used.
-void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const {
+void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
bool isThumb1 = Subtarget.isThumb1Only();
bool isThumb2 = Subtarget.isThumb2();
const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
- MachineInstr *MI = MBBI;
DebugLoc dl = MI->getDebugLoc();
MachineBasicBlock *BB = MI->getParent();
@@ -1269,24 +1274,20 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const {
STM.addReg(Reg, RegState::Kill);
}
- BB->erase(MBBI);
+ BB->erase(MI);
}
-bool
-ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- MachineFunction &MF = *MI->getParent()->getParent();
- Reloc::Model RM = MF.getTarget().getRelocationModel();
-
- if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
+bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
"LOAD_STACK_GUARD currently supported only for MachO.");
- expandLoadStackGuard(MI, RM);
- MI->getParent()->erase(MI);
+ expandLoadStackGuard(MI);
+ MI.getParent()->erase(MI);
return true;
}
- if (MI->getOpcode() == ARM::MEMCPY) {
+ if (MI.getOpcode() == ARM::MEMCPY) {
expandMEMCPY(MI);
return true;
}
@@ -1295,14 +1296,13 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
- Subtarget.isFPOnlySP())
+ if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
return false;
// Look for a copy between even S-registers. That is where we keep floats
// when using NEON v2f32 instructions for f32 arithmetic.
- unsigned DstRegS = MI->getOperand(0).getReg();
- unsigned SrcRegS = MI->getOperand(1).getReg();
+ unsigned DstRegS = MI.getOperand(0).getReg();
+ unsigned SrcRegS = MI.getOperand(1).getReg();
if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
return false;
@@ -1317,44 +1317,44 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
// We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only
// legal if the COPY already defines the full DstRegD, and it isn't a
// sub-register insertion.
- if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
+ if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))
return false;
// A dead copy shouldn't show up here, but reject it just in case.
- if (MI->getOperand(0).isDead())
+ if (MI.getOperand(0).isDead())
return false;
// All clear, widen the COPY.
- DEBUG(dbgs() << "widening: " << *MI);
- MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ DEBUG(dbgs() << "widening: " << MI);
+ MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
// Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
// or some other super-register.
- int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
+ int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
if (ImpDefIdx != -1)
- MI->RemoveOperand(ImpDefIdx);
+ MI.RemoveOperand(ImpDefIdx);
// Change the opcode and operands.
- MI->setDesc(get(ARM::VMOVD));
- MI->getOperand(0).setReg(DstRegD);
- MI->getOperand(1).setReg(SrcRegD);
+ MI.setDesc(get(ARM::VMOVD));
+ MI.getOperand(0).setReg(DstRegD);
+ MI.getOperand(1).setReg(SrcRegD);
AddDefaultPred(MIB);
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
// are reading an undefined value from SrcRegD, but a proper value from
// SrcRegS.
- MI->getOperand(1).setIsUndef();
+ MI.getOperand(1).setIsUndef();
MIB.addReg(SrcRegS, RegState::Implicit);
// SrcRegD may actually contain an unrelated value in the ssub_1
// sub-register. Don't kill it. Only kill the ssub_0 sub-register.
- if (MI->getOperand(1).isKill()) {
- MI->getOperand(1).setIsKill(false);
- MI->addRegisterKilled(SrcRegS, TRI, true);
+ if (MI.getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(false);
+ MI.addRegisterKilled(SrcRegS, TRI, true);
}
- DEBUG(dbgs() << "replaced by: " << *MI);
+ DEBUG(dbgs() << "replaced by: " << MI);
return true;
}
@@ -1403,54 +1403,54 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
return PCLabelId;
}
-void ARMBaseInstrInfo::
-reMaterialize(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig,
- const TargetRegisterInfo &TRI) const {
- unsigned Opcode = Orig->getOpcode();
+void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr &Orig,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Opcode = Orig.getOpcode();
switch (Opcode) {
default: {
- MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
+ MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
break;
}
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
MachineFunction &MF = *MBB.getParent();
- unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned CPI = Orig.getOperand(1).getIndex();
unsigned PCLabelId = duplicateCPV(MF, CPI);
- MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
- DestReg)
- .addConstantPoolIndex(CPI).addImm(PCLabelId);
- MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)
+ .addConstantPoolIndex(CPI)
+ .addImm(PCLabelId);
+ MIB->setMemRefs(Orig.memoperands_begin(), Orig.memoperands_end());
break;
}
}
}
-MachineInstr *
-ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+MachineInstr *ARMBaseInstrInfo::duplicate(MachineInstr &Orig,
+ MachineFunction &MF) const {
MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
- switch(Orig->getOpcode()) {
+ switch (Orig.getOpcode()) {
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
- unsigned CPI = Orig->getOperand(1).getIndex();
+ unsigned CPI = Orig.getOperand(1).getIndex();
unsigned PCLabelId = duplicateCPV(MF, CPI);
- Orig->getOperand(1).setIndex(CPI);
- Orig->getOperand(2).setImm(PCLabelId);
+ Orig.getOperand(1).setIndex(CPI);
+ Orig.getOperand(2).setImm(PCLabelId);
break;
}
}
return MI;
}
-bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1,
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
+ const MachineInstr &MI1,
const MachineRegisterInfo *MRI) const {
- unsigned Opcode = MI0->getOpcode();
+ unsigned Opcode = MI0.getOpcode();
if (Opcode == ARM::t2LDRpci ||
Opcode == ARM::t2LDRpci_pic ||
Opcode == ARM::tLDRpci ||
@@ -1461,13 +1461,13 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
Opcode == ARM::MOV_ga_pcrel ||
Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel) {
- if (MI1->getOpcode() != Opcode)
+ if (MI1.getOpcode() != Opcode)
return false;
- if (MI0->getNumOperands() != MI1->getNumOperands())
+ if (MI0.getNumOperands() != MI1.getNumOperands())
return false;
- const MachineOperand &MO0 = MI0->getOperand(1);
- const MachineOperand &MO1 = MI1->getOperand(1);
+ const MachineOperand &MO0 = MI0.getOperand(1);
+ const MachineOperand &MO1 = MI1.getOperand(1);
if (MO0.getOffset() != MO1.getOffset())
return false;
@@ -1480,7 +1480,7 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
// Ignore the PC labels.
return MO0.getGlobal() == MO1.getGlobal();
- const MachineFunction *MF = MI0->getParent()->getParent();
+ const MachineFunction *MF = MI0.getParent()->getParent();
const MachineConstantPool *MCP = MF->getConstantPool();
int CPI0 = MO0.getIndex();
int CPI1 = MO1.getIndex();
@@ -1499,13 +1499,13 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
}
return false;
} else if (Opcode == ARM::PICLDR) {
- if (MI1->getOpcode() != Opcode)
+ if (MI1.getOpcode() != Opcode)
return false;
- if (MI0->getNumOperands() != MI1->getNumOperands())
+ if (MI0.getNumOperands() != MI1.getNumOperands())
return false;
- unsigned Addr0 = MI0->getOperand(1).getReg();
- unsigned Addr1 = MI1->getOperand(1).getReg();
+ unsigned Addr0 = MI0.getOperand(1).getReg();
+ unsigned Addr1 = MI1.getOperand(1).getReg();
if (Addr0 != Addr1) {
if (!MRI ||
!TargetRegisterInfo::isVirtualRegister(Addr0) ||
@@ -1517,21 +1517,21 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
MachineInstr *Def1 = MRI->getVRegDef(Addr1);
// Check if the loaded value, e.g. a constantpool of a global address, are
// the same.
- if (!produceSameValue(Def0, Def1, MRI))
+ if (!produceSameValue(*Def0, *Def1, MRI))
return false;
}
- for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+ for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
// %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
- const MachineOperand &MO0 = MI0->getOperand(i);
- const MachineOperand &MO1 = MI1->getOperand(i);
+ const MachineOperand &MO0 = MI0.getOperand(i);
+ const MachineOperand &MO1 = MI1.getOperand(i);
if (!MO0.isIdenticalTo(MO1))
return false;
}
return true;
}
- return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+ return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
@@ -1653,7 +1653,7 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
return true;
}
-bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
// Debug info is never a scheduling boundary. It's necessary to be explicit
@@ -1662,11 +1662,11 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// considered a scheduling hazard, which is wrong. It should be the actual
// instruction preceding the dbg_value instruction(s), just like it is
// when debug info is not present.
- if (MI->isDebugValue())
+ if (MI.isDebugValue())
return false;
// Terminators and labels can't be scheduled around.
- if (MI->isTerminator() || MI->isPosition())
+ if (MI.isTerminator() || MI.isPosition())
return true;
// Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1690,7 +1690,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// Calls don't actually change the stack pointer, even if they have imp-defs.
// No ARM calling conventions change the stack pointer. (X86 calling
// conventions sometimes do).
- if (!MI->isCall() && MI->definesRegister(ARM::SP))
+ if (!MI.isCall() && MI.definesRegister(ARM::SP))
return true;
return false;
@@ -1718,7 +1718,7 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
CmpMI->getOpcode() == ARM::t2CMPri) {
unsigned Reg = CmpMI->getOperand(0).getReg();
unsigned PredReg = 0;
- ARMCC::CondCodes P = getInstrPredicate(CmpMI, PredReg);
+ ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
isARMLowRegister(Reg))
return false;
@@ -1765,24 +1765,24 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
bool
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB) const {
- // Reduce false anti-dependencies to let Swift's out-of-order execution
+ // Reduce false anti-dependencies to let the target's out-of-order execution
// engine do its thing.
- return Subtarget.isSwift();
+ return Subtarget.isProfitableToUnpredicate();
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
-ARMCC::CondCodes
-llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
- int PIdx = MI->findFirstPredOperandIdx();
+ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
+ unsigned &PredReg) {
+ int PIdx = MI.findFirstPredOperandIdx();
if (PIdx == -1) {
PredReg = 0;
return ARMCC::AL;
}
- PredReg = MI->getOperand(PIdx+1).getReg();
- return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+ PredReg = MI.getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
}
@@ -1797,11 +1797,11 @@ unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
llvm_unreachable("Unknown unconditional branch opcode!");
}
-MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const {
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
case ARM::MOVCCr:
case ARM::t2MOVCCr: {
// MOVCC can be commuted by inverting the condition.
@@ -1810,13 +1810,14 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI,
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return nullptr;
- MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
- if (!MI)
+ MachineInstr *CommutedMI =
+ TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+ if (!CommutedMI)
return nullptr;
// After swapping the MOVCC operands, also invert the condition.
- MI->getOperand(MI->findFirstPredOperandIdx())
- .setImm(ARMCC::getOppositeCondition(CC));
- return MI;
+ CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())
+ .setImm(ARMCC::getOppositeCondition(CC));
+ return CommutedMI;
}
}
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
@@ -1860,11 +1861,11 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
return MI;
}
-bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,
SmallVectorImpl<MachineOperand> &Cond,
unsigned &TrueOp, unsigned &FalseOp,
bool &Optimizable) const {
- assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
// MOVCC operands:
// 0: Def.
@@ -1874,38 +1875,38 @@ bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
// 4: CPSR use.
TrueOp = 1;
FalseOp = 2;
- Cond.push_back(MI->getOperand(3));
- Cond.push_back(MI->getOperand(4));
+ Cond.push_back(MI.getOperand(3));
+ Cond.push_back(MI.getOperand(4));
// We can always fold a def.
Optimizable = true;
return false;
}
MachineInstr *
-ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &SeenMIs,
bool PreferFalse) const {
- assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);
bool Invert = !DefMI;
if (!DefMI)
- DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
+ DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);
if (!DefMI)
return nullptr;
// Find new register class to use.
- MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
- unsigned DestReg = MI->getOperand(0).getReg();
+ MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
+ unsigned DestReg = MI.getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
return nullptr;
// Create a new predicated version of DefMI.
// Rfalse is the first use.
- MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- DefMI->getDesc(), DestReg);
+ MachineInstrBuilder NewMI =
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1913,12 +1914,12 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
NewMI.addOperand(DefMI->getOperand(i));
- unsigned CondCode = MI->getOperand(3).getImm();
+ unsigned CondCode = MI.getOperand(3).getImm();
if (Invert)
NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
else
NewMI.addImm(CondCode);
- NewMI.addOperand(MI->getOperand(4));
+ NewMI.addOperand(MI.getOperand(4));
// DefMI is not the -S version that sets CPSR, so add an optional %noreg.
if (NewMI->hasOptionalDef())
@@ -1940,7 +1941,7 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
// DefMI would be invalid when tranferred inside the loop. Checking for a
// loop is expensive, but at least remove kill flags if they are in different
// BBs.
- if (DefMI->getParent() != MI->getParent())
+ if (DefMI->getParent() != MI.getParent())
NewMI->clearKillInfo();
// The caller will erase MI, but not DefMI.
@@ -1994,10 +1995,12 @@ unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
}
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, DebugLoc dl,
- unsigned DestReg, unsigned BaseReg, int NumBytes,
- ARMCC::CondCodes Pred, unsigned PredReg,
- const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg,
+ unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII,
+ unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
.addReg(BaseReg, RegState::Kill)
@@ -2281,30 +2284,30 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
-bool ARMBaseInstrInfo::
-analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
- int &CmpMask, int &CmpValue) const {
- switch (MI->getOpcode()) {
+bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const {
+ switch (MI.getOpcode()) {
default: break;
case ARM::CMPri:
case ARM::t2CMPri:
- SrcReg = MI->getOperand(0).getReg();
+ SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- CmpValue = MI->getOperand(1).getImm();
+ CmpValue = MI.getOperand(1).getImm();
return true;
case ARM::CMPrr:
case ARM::t2CMPrr:
- SrcReg = MI->getOperand(0).getReg();
- SrcReg2 = MI->getOperand(1).getReg();
+ SrcReg = MI.getOperand(0).getReg();
+ SrcReg2 = MI.getOperand(1).getReg();
CmpMask = ~0;
CmpValue = 0;
return true;
case ARM::TSTri:
case ARM::t2TSTri:
- SrcReg = MI->getOperand(0).getReg();
+ SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
- CmpMask = MI->getOperand(1).getImm();
+ CmpMask = MI.getOperand(1).getImm();
CmpValue = 0;
return true;
}
@@ -2385,25 +2388,25 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
/// condition code of instructions which use the flags.
-bool ARMBaseInstrInfo::
-optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
- int CmpMask, int CmpValue,
- const MachineRegisterInfo *MRI) const {
+bool ARMBaseInstrInfo::optimizeCompareInstr(
+ MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
+ int CmpValue, const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
- if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {
MI = nullptr;
for (MachineRegisterInfo::use_instr_iterator
UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
UI != UE; ++UI) {
- if (UI->getParent() != CmpInstr->getParent()) continue;
+ if (UI->getParent() != CmpInstr.getParent())
+ continue;
MachineInstr *PotentialAND = &*UI;
if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
- isPredicated(PotentialAND))
+ isPredicated(*PotentialAND))
continue;
MI = PotentialAND;
break;
@@ -2414,7 +2417,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Get ready to iterate backward from CmpInstr.
MachineBasicBlock::iterator I = CmpInstr, E = MI,
- B = CmpInstr->getParent()->begin();
+ B = CmpInstr.getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
@@ -2427,13 +2430,13 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
MI = nullptr;
- else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
+ else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
// Thus we cannot return here.
- if (CmpInstr->getOpcode() == ARM::CMPri ||
- CmpInstr->getOpcode() == ARM::t2CMPri)
+ if (CmpInstr.getOpcode() == ARM::CMPri ||
+ CmpInstr.getOpcode() == ARM::t2CMPri)
MI = nullptr;
else
return false;
@@ -2453,7 +2456,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
return false;
// Check whether CmpInstr can be made redundant by the current instruction.
- if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
+ if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
Sub = &*I;
break;
}
@@ -2471,7 +2474,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
if (!MI) MI = Sub;
// We can't use a predicated instruction - it doesn't always write the flags.
- if (isPredicated(MI))
+ if (isPredicated(*MI))
return false;
switch (MI->getOpcode()) {
@@ -2519,7 +2522,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
OperandsToUpdate;
bool isSafe = false;
I = CmpInstr;
- E = CmpInstr->getParent()->end();
+ E = CmpInstr.getParent()->end();
while (!isSafe && ++I != E) {
const MachineInstr &Instr = *I;
for (unsigned IO = 0, EO = Instr.getNumOperands();
@@ -2608,7 +2611,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// If CPSR is not killed nor re-defined, we should check whether it is
// live-out. If it is live-out, do not optimize.
if (!isSafe) {
- MachineBasicBlock *MBB = CmpInstr->getParent();
+ MachineBasicBlock *MBB = CmpInstr.getParent();
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI)
if ((*SI)->isLiveIn(ARM::CPSR))
@@ -2618,8 +2621,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
- assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
- CmpInstr->eraseFromParent();
+ assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
+ CmpInstr.eraseFromParent();
// Modify the condition code of operands in OperandsToUpdate.
// Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
@@ -2633,42 +2636,42 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
return false;
}
-bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
- MachineInstr *DefMI, unsigned Reg,
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ unsigned Reg,
MachineRegisterInfo *MRI) const {
// Fold large immediates into add, sub, or, xor.
- unsigned DefOpc = DefMI->getOpcode();
+ unsigned DefOpc = DefMI.getOpcode();
if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
return false;
- if (!DefMI->getOperand(1).isImm())
+ if (!DefMI.getOperand(1).isImm())
// Could be t2MOVi32imm <ga:xx>
return false;
if (!MRI->hasOneNonDBGUse(Reg))
return false;
- const MCInstrDesc &DefMCID = DefMI->getDesc();
+ const MCInstrDesc &DefMCID = DefMI.getDesc();
if (DefMCID.hasOptionalDef()) {
unsigned NumOps = DefMCID.getNumOperands();
- const MachineOperand &MO = DefMI->getOperand(NumOps-1);
+ const MachineOperand &MO = DefMI.getOperand(NumOps - 1);
if (MO.getReg() == ARM::CPSR && !MO.isDead())
// If DefMI defines CPSR and it is not dead, it's obviously not safe
// to delete DefMI.
return false;
}
- const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const MCInstrDesc &UseMCID = UseMI.getDesc();
if (UseMCID.hasOptionalDef()) {
unsigned NumOps = UseMCID.getNumOperands();
- if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
+ if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)
// If the instruction sets the flag, do not attempt this optimization
// since it may change the semantics of the code.
return false;
}
- unsigned UseOpc = UseMI->getOpcode();
+ unsigned UseOpc = UseMI.getOpcode();
unsigned NewUseOpc = 0;
- uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+ uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();
uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
bool Commute = false;
switch (UseOpc) {
@@ -2681,17 +2684,27 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
case ARM::t2ADDrr:
case ARM::t2ORRrr:
case ARM::t2EORrr: {
- Commute = UseMI->getOperand(2).getReg() != Reg;
+ Commute = UseMI.getOperand(2).getReg() != Reg;
switch (UseOpc) {
default: break;
+ case ARM::ADDrr:
case ARM::SUBrr: {
- if (Commute)
+ if (UseOpc == ARM::SUBrr && Commute)
+ return false;
+
+ // ADD/SUB are special because they're essentially the same operation, so
+ // we can handle a larger range of immediates.
+ if (ARM_AM::isSOImmTwoPartVal(ImmVal))
+ NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
+ else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
+ ImmVal = -ImmVal;
+ NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
+ } else
return false;
- ImmVal = -ImmVal;
- NewUseOpc = ARM::SUBri;
- // Fallthrough
+ SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ break;
}
- case ARM::ADDrr:
case ARM::ORRrr:
case ARM::EORrr: {
if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
@@ -2700,20 +2713,29 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
switch (UseOpc) {
default: break;
- case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
case ARM::EORrr: NewUseOpc = ARM::EORri; break;
}
break;
}
+ case ARM::t2ADDrr:
case ARM::t2SUBrr: {
- if (Commute)
+ if (UseOpc == ARM::t2SUBrr && Commute)
return false;
- ImmVal = -ImmVal;
- NewUseOpc = ARM::t2SUBri;
- // Fallthrough
+
+ // ADD/SUB are special because they're essentially the same operation, so
+ // we can handle a larger range of immediates.
+ if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+ NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
+ else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
+ ImmVal = -ImmVal;
+ NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
+ } else
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+ break;
}
- case ARM::t2ADDrr:
case ARM::t2ORRrr:
case ARM::t2EORrr: {
if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
@@ -2722,7 +2744,6 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
switch (UseOpc) {
default: break;
- case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
}
@@ -2733,27 +2754,27 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
}
unsigned OpIdx = Commute ? 2 : 1;
- unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
- bool isKill = UseMI->getOperand(OpIdx).isKill();
+ unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
+ bool isKill = UseMI.getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
- AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
- UseMI, UseMI->getDebugLoc(),
- get(NewUseOpc), NewReg)
- .addReg(Reg1, getKillRegState(isKill))
- .addImm(SOImmValV1)));
- UseMI->setDesc(get(NewUseOpc));
- UseMI->getOperand(1).setReg(NewReg);
- UseMI->getOperand(1).setIsKill();
- UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
- DefMI->eraseFromParent();
+ AddDefaultCC(
+ AddDefaultPred(BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
+ get(NewUseOpc), NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)));
+ UseMI.setDesc(get(NewUseOpc));
+ UseMI.getOperand(1).setReg(NewReg);
+ UseMI.getOperand(1).setIsKill();
+ UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);
+ DefMI.eraseFromParent();
return true;
}
static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
- const MachineInstr *MI) {
- switch (MI->getOpcode()) {
+ const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default: {
- const MCInstrDesc &Desc = MI->getDesc();
+ const MCInstrDesc &Desc = MI.getDesc();
int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
assert(UOps >= 0 && "bad # UOps");
return UOps;
@@ -2763,7 +2784,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDRBrs:
case ARM::STRrs:
case ARM::STRBrs: {
- unsigned ShOpVal = MI->getOperand(3).getImm();
+ unsigned ShOpVal = MI.getOperand(3).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
@@ -2776,10 +2797,10 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDRH:
case ARM::STRH: {
- if (!MI->getOperand(2).getReg())
+ if (!MI.getOperand(2).getReg())
return 1;
- unsigned ShOpVal = MI->getOperand(3).getImm();
+ unsigned ShOpVal = MI.getOperand(3).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
@@ -2792,22 +2813,22 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDRSB:
case ARM::LDRSH:
- return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
+ return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;
case ARM::LDRSB_POST:
case ARM::LDRSH_POST: {
- unsigned Rt = MI->getOperand(0).getReg();
- unsigned Rm = MI->getOperand(3).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
+ unsigned Rm = MI.getOperand(3).getReg();
return (Rt == Rm) ? 4 : 3;
}
case ARM::LDR_PRE_REG:
case ARM::LDRB_PRE_REG: {
- unsigned Rt = MI->getOperand(0).getReg();
- unsigned Rm = MI->getOperand(3).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
+ unsigned Rm = MI.getOperand(3).getReg();
if (Rt == Rm)
return 3;
- unsigned ShOpVal = MI->getOperand(4).getImm();
+ unsigned ShOpVal = MI.getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
@@ -2820,7 +2841,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::STR_PRE_REG:
case ARM::STRB_PRE_REG: {
- unsigned ShOpVal = MI->getOperand(4).getImm();
+ unsigned ShOpVal = MI.getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
@@ -2833,21 +2854,20 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDRH_PRE:
case ARM::STRH_PRE: {
- unsigned Rt = MI->getOperand(0).getReg();
- unsigned Rm = MI->getOperand(3).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
+ unsigned Rm = MI.getOperand(3).getReg();
if (!Rm)
return 2;
if (Rt == Rm)
return 3;
- return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
- ? 3 : 2;
+ return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;
}
case ARM::LDR_POST_REG:
case ARM::LDRB_POST_REG:
case ARM::LDRH_POST: {
- unsigned Rt = MI->getOperand(0).getReg();
- unsigned Rm = MI->getOperand(3).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
+ unsigned Rm = MI.getOperand(3).getReg();
return (Rt == Rm) ? 3 : 2;
}
@@ -2866,13 +2886,13 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
case ARM::LDRSB_PRE:
case ARM::LDRSH_PRE: {
- unsigned Rm = MI->getOperand(3).getReg();
+ unsigned Rm = MI.getOperand(3).getReg();
if (Rm == 0)
return 3;
- unsigned Rt = MI->getOperand(0).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
if (Rt == Rm)
return 4;
- unsigned ShOpVal = MI->getOperand(4).getImm();
+ unsigned ShOpVal = MI.getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
@@ -2884,18 +2904,20 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
}
case ARM::LDRD: {
- unsigned Rt = MI->getOperand(0).getReg();
- unsigned Rn = MI->getOperand(2).getReg();
- unsigned Rm = MI->getOperand(3).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
+ unsigned Rn = MI.getOperand(2).getReg();
+ unsigned Rm = MI.getOperand(3).getReg();
if (Rm)
- return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
+ : 3;
return (Rt == Rn) ? 3 : 2;
}
case ARM::STRD: {
- unsigned Rm = MI->getOperand(3).getReg();
+ unsigned Rm = MI.getOperand(3).getReg();
if (Rm)
- return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
+ return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4
+ : 3;
return 2;
}
@@ -2908,24 +2930,26 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
return 4;
case ARM::LDRD_PRE: {
- unsigned Rt = MI->getOperand(0).getReg();
- unsigned Rn = MI->getOperand(3).getReg();
- unsigned Rm = MI->getOperand(4).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
+ unsigned Rn = MI.getOperand(3).getReg();
+ unsigned Rm = MI.getOperand(4).getReg();
if (Rm)
- return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
+ : 4;
return (Rt == Rn) ? 4 : 3;
}
case ARM::t2LDRD_PRE: {
- unsigned Rt = MI->getOperand(0).getReg();
- unsigned Rn = MI->getOperand(3).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
+ unsigned Rn = MI.getOperand(3).getReg();
return (Rt == Rn) ? 4 : 3;
}
case ARM::STRD_PRE: {
- unsigned Rm = MI->getOperand(4).getReg();
+ unsigned Rm = MI.getOperand(4).getReg();
if (Rm)
- return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
+ return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5
+ : 4;
return 3;
}
@@ -2953,8 +2977,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
return 2;
case ARM::t2LDRDi8: {
- unsigned Rt = MI->getOperand(0).getReg();
- unsigned Rn = MI->getOperand(2).getReg();
+ unsigned Rt = MI.getOperand(0).getReg();
+ unsigned Rn = MI.getOperand(2).getReg();
return (Rt == Rn) ? 3 : 2;
}
@@ -2994,22 +3018,61 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
// sizes during MC lowering. That target hook should be local to MC lowering
// because we can't ensure that it is aware of other MI forms. Doing this will
// ensure that MachineMemOperands are correctly propagated through all passes.
-unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
+unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
unsigned Size = 0;
- for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
- E = MI->memoperands_end(); I != E; ++I) {
+ for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
+ E = MI.memoperands_end();
+ I != E; ++I) {
Size += (*I)->getSize();
}
return Size / 4;
}
-unsigned
-ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
- const MachineInstr *MI) const {
+static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
+ unsigned NumRegs) {
+ unsigned UOps = 1 + NumRegs; // 1 for address computation.
+ switch (Opc) {
+ default:
+ break;
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ ++UOps; // One for base register writeback.
+ break;
+ case ARM::LDMIA_RET:
+ case ARM::tPOP_RET:
+ case ARM::t2LDMIA_RET:
+ UOps += 2; // One for base reg wb, one for write to pc.
+ break;
+ }
+ return UOps;
+}
+
+unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr &MI) const {
if (!ItinData || ItinData->isEmpty())
return 1;
- const MCInstrDesc &Desc = MI->getDesc();
+ const MCInstrDesc &Desc = MI.getDesc();
unsigned Class = Desc.getSchedClass();
int ItinUOps = ItinData->getNumMicroOps(Class);
if (ItinUOps >= 0) {
@@ -3019,7 +3082,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
return ItinUOps;
}
- unsigned Opc = MI->getOpcode();
+ unsigned Opc = MI.getOpcode();
switch (Opc) {
default:
llvm_unreachable("Unexpected multi-uops instruction!");
@@ -3049,7 +3112,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::VSTMSIA:
case ARM::VSTMSIA_UPD:
case ARM::VSTMSDB_UPD: {
- unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+ unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();
return (NumRegs / 2) + (NumRegs % 2) + 1;
}
@@ -3085,66 +3148,36 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::t2STMDB:
case ARM::t2STMIA_UPD:
case ARM::t2STMDB_UPD: {
- unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
- if (Subtarget.isSwift()) {
- int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
- switch (Opc) {
- default: break;
- case ARM::VLDMDIA_UPD:
- case ARM::VLDMDDB_UPD:
- case ARM::VLDMSIA_UPD:
- case ARM::VLDMSDB_UPD:
- case ARM::VSTMDIA_UPD:
- case ARM::VSTMDDB_UPD:
- case ARM::VSTMSIA_UPD:
- case ARM::VSTMSDB_UPD:
- case ARM::LDMIA_UPD:
- case ARM::LDMDA_UPD:
- case ARM::LDMDB_UPD:
- case ARM::LDMIB_UPD:
- case ARM::STMIA_UPD:
- case ARM::STMDA_UPD:
- case ARM::STMDB_UPD:
- case ARM::STMIB_UPD:
- case ARM::tLDMIA_UPD:
- case ARM::tSTMIA_UPD:
- case ARM::t2LDMIA_UPD:
- case ARM::t2LDMDB_UPD:
- case ARM::t2STMIA_UPD:
- case ARM::t2STMDB_UPD:
- ++UOps; // One for base register writeback.
- break;
- case ARM::LDMIA_RET:
- case ARM::tPOP_RET:
- case ARM::t2LDMIA_RET:
- UOps += 2; // One for base reg wb, one for write to pc.
- break;
- }
- return UOps;
- } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
+ unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;
+ switch (Subtarget.getLdStMultipleTiming()) {
+ case ARMSubtarget::SingleIssuePlusExtras:
+ return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
+ case ARMSubtarget::SingleIssue:
+ // Assume the worst.
+ return NumRegs;
+ case ARMSubtarget::DoubleIssue: {
if (NumRegs < 4)
return 2;
// 4 registers would be issued: 2, 2.
// 5 registers would be issued: 2, 2, 1.
- int A8UOps = (NumRegs / 2);
+ unsigned UOps = (NumRegs / 2);
if (NumRegs % 2)
- ++A8UOps;
- return A8UOps;
- } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
- int A9UOps = (NumRegs / 2);
+ ++UOps;
+ return UOps;
+ }
+ case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
+ unsigned UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
- if ((NumRegs % 2) ||
- !MI->hasOneMemOperand() ||
- (*MI->memoperands_begin())->getAlignment() < 8)
- ++A9UOps;
- return A9UOps;
- } else {
- // Assume the worst.
- return NumRegs;
+ if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
+ (*MI.memoperands_begin())->getAlignment() < 8)
+ ++UOps;
+ return UOps;
+ }
}
}
}
+ llvm_unreachable("Didn't find the number of microops");
}
int
@@ -3428,13 +3461,13 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
}
static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
- const MachineInstr *MI, unsigned Reg,
+ const MachineInstr &MI, unsigned Reg,
unsigned &UseIdx, unsigned &Dist) {
Dist = 0;
- MachineBasicBlock::const_instr_iterator II = ++MI->getIterator();
+ MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();
assert(II->isInsideBundle() && "Empty bundle?");
- MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
// FIXME: This doesn't properly handle multiple uses.
int Idx = -1;
@@ -3460,17 +3493,17 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
/// itinerary based on the def opcode and alignment. The caller will ensure that
/// adjusted latency is at least one cycle.
static int adjustDefLatency(const ARMSubtarget &Subtarget,
- const MachineInstr *DefMI,
- const MCInstrDesc *DefMCID, unsigned DefAlign) {
+ const MachineInstr &DefMI,
+ const MCInstrDesc &DefMCID, unsigned DefAlign) {
int Adjust = 0;
if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
- switch (DefMCID->getOpcode()) {
+ switch (DefMCID.getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
- unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ unsigned ShOpVal = DefMI.getOperand(3).getImm();
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (ShImm == 0 ||
(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
@@ -3482,7 +3515,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
case ARM::t2LDRHs:
case ARM::t2LDRSHs: {
// Thumb2 mode: lsl only.
- unsigned ShAmt = DefMI->getOperand(3).getImm();
+ unsigned ShAmt = DefMI.getOperand(3).getImm();
if (ShAmt == 0 || ShAmt == 2)
--Adjust;
break;
@@ -3491,11 +3524,11 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
} else if (Subtarget.isSwift()) {
// FIXME: Properly handle all of the latency adjustments for address
// writeback.
- switch (DefMCID->getOpcode()) {
+ switch (DefMCID.getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
- unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ unsigned ShOpVal = DefMI.getOperand(3).getImm();
bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
if (!isSub &&
@@ -3513,7 +3546,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
case ARM::t2LDRHs:
case ARM::t2LDRSHs: {
// Thumb2 mode: lsl only.
- unsigned ShAmt = DefMI->getOperand(3).getImm();
+ unsigned ShAmt = DefMI.getOperand(3).getImm();
if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
Adjust -= 2;
break;
@@ -3521,8 +3554,8 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
}
}
- if (DefAlign < 8 && Subtarget.isLikeA9()) {
- switch (DefMCID->getOpcode()) {
+ if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
+ switch (DefMCID.getOpcode()) {
default: break;
case ARM::VLD1q8:
case ARM::VLD1q16:
@@ -3637,53 +3670,55 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
return Adjust;
}
-
-
-int
-ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI,
- unsigned UseIdx) const {
+int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &DefMI,
+ unsigned DefIdx,
+ const MachineInstr &UseMI,
+ unsigned UseIdx) const {
// No operand latency. The caller may fall back to getInstrLatency.
if (!ItinData || ItinData->isEmpty())
return -1;
- const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
- const MCInstrDesc *DefMCID = &DefMI->getDesc();
- const MCInstrDesc *UseMCID = &UseMI->getDesc();
+ const MachineInstr *ResolvedDefMI = &DefMI;
unsigned DefAdj = 0;
- if (DefMI->isBundle()) {
- DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
- DefMCID = &DefMI->getDesc();
- }
- if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
- DefMI->isRegSequence() || DefMI->isImplicitDef()) {
+ if (DefMI.isBundle())
+ ResolvedDefMI =
+ getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);
+ if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||
+ ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {
return 1;
}
+ const MachineInstr *ResolvedUseMI = &UseMI;
unsigned UseAdj = 0;
- if (UseMI->isBundle()) {
- unsigned NewUseIdx;
- const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
- Reg, NewUseIdx, UseAdj);
- if (!NewUseMI)
+ if (UseMI.isBundle()) {
+ ResolvedUseMI =
+ getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);
+ if (!ResolvedUseMI)
return -1;
-
- UseMI = NewUseMI;
- UseIdx = NewUseIdx;
- UseMCID = &UseMI->getDesc();
}
+ return getOperandLatencyImpl(
+ ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,
+ Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);
+}
+
+int ARMBaseInstrInfo::getOperandLatencyImpl(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI,
+ unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,
+ const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,
+ unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {
if (Reg == ARM::CPSR) {
- if (DefMI->getOpcode() == ARM::FMSTAT) {
+ if (DefMI.getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
return Subtarget.isLikeA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
- if (UseMI->isBranch())
+ if (UseMI.isBranch())
return 0;
// Otherwise it takes the instruction latency (generally one).
@@ -3694,7 +3729,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// incur a code size penalty (not able to use the CPSR setting 16-bit
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
- const MachineFunction *MF = DefMI->getParent()->getParent();
+ const MachineFunction *MF = DefMI.getParent()->getParent();
// FIXME: Use Function::optForSize().
if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
--Latency;
@@ -3702,17 +3737,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
- if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
+ if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())
return -1;
- unsigned DefAlign = DefMI->hasOneMemOperand()
- ? (*DefMI->memoperands_begin())->getAlignment() : 0;
- unsigned UseAlign = UseMI->hasOneMemOperand()
- ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ unsigned DefAlign = DefMI.hasOneMemOperand()
+ ? (*DefMI.memoperands_begin())->getAlignment()
+ : 0;
+ unsigned UseAlign = UseMI.hasOneMemOperand()
+ ? (*UseMI.memoperands_begin())->getAlignment()
+ : 0;
// Get the itinerary's latency if possible, and handle variable_ops.
- int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
- *UseMCID, UseIdx, UseAlign);
+ int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, UseMCID,
+ UseIdx, UseAlign);
// Unable to find operand latency. The caller may resort to getInstrLatency.
if (Latency < 0)
return Latency;
@@ -3746,10 +3783,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (!UseNode->isMachineOpcode()) {
int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
- if (Subtarget.isLikeA9() || Subtarget.isSwift())
- return Latency <= 2 ? 1 : Latency - 1;
- else
- return Latency <= 3 ? 1 : Latency - 2;
+ int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
+ int Threshold = 1 + Adj;
+ return Latency <= Threshold ? 1 : Latency - Adj;
}
const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
@@ -3820,7 +3856,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
}
- if (DefAlign < 8 && Subtarget.isLikeA9())
+ if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
switch (DefMCID.getOpcode()) {
default: break;
case ARM::VLD1q8:
@@ -3946,15 +3982,15 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
-unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
- if (MI->isCopyLike() || MI->isInsertSubreg() ||
- MI->isRegSequence() || MI->isImplicitDef())
+unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
+ if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
+ MI.isImplicitDef())
return 0;
- if (MI->isBundle())
+ if (MI.isBundle())
return 0;
- const MCInstrDesc &MCID = MI->getDesc();
+ const MCInstrDesc &MCID = MI.getDesc();
if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
// When predicated, CPSR is an additional source operand for CPSR updating
@@ -3965,26 +4001,26 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
}
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
+ const MachineInstr &MI,
unsigned *PredCost) const {
- if (MI->isCopyLike() || MI->isInsertSubreg() ||
- MI->isRegSequence() || MI->isImplicitDef())
+ if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||
+ MI.isImplicitDef())
return 1;
// An instruction scheduler typically runs on unbundled instructions, however
// other passes may query the latency of a bundled instruction.
- if (MI->isBundle()) {
+ if (MI.isBundle()) {
unsigned Latency = 0;
- MachineBasicBlock::const_instr_iterator I = MI->getIterator();
- MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I = MI.getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
if (I->getOpcode() != ARM::t2IT)
- Latency += getInstrLatency(ItinData, &*I, PredCost);
+ Latency += getInstrLatency(ItinData, *I, PredCost);
}
return Latency;
}
- const MCInstrDesc &MCID = MI->getDesc();
+ const MCInstrDesc &MCID = MI.getDesc();
if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
@@ -3993,7 +4029,7 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
// Be sure to call getStageLatency for an empty itinerary in case it has a
// valid MinLatency property.
if (!ItinData)
- return MI->mayLoad() ? 3 : 1;
+ return MI.mayLoad() ? 3 : 1;
unsigned Class = MCID.getSchedClass();
@@ -4005,9 +4041,9 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
unsigned Latency = ItinData->getStageLatency(Class);
// Adjust for dynamic def-side opcode variants not captured by the itinerary.
- unsigned DefAlign = MI->hasOneMemOperand()
- ? (*MI->memoperands_begin())->getAlignment() : 0;
- int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
+ unsigned DefAlign =
+ MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
+ int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
if (Adj >= 0 || (int)Latency > -Adj) {
return Latency + Adj;
}
@@ -4032,46 +4068,46 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}
}
-bool ARMBaseInstrInfo::
-hasHighOperandLatency(const TargetSchedModel &SchedModel,
- const MachineRegisterInfo *MRI,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
- unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
- if (Subtarget.isCortexA8() &&
+bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr &DefMI,
+ unsigned DefIdx,
+ const MachineInstr &UseMI,
+ unsigned UseIdx) const {
+ unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.nonpipelinedVFP() &&
(DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
- // CortexA8 VFP instructions are not pipelined.
return true;
// Hoist VFP / NEON instructions with 4 or higher latency.
- unsigned Latency
- = SchedModel.computeOperandLatency(DefMI, DefIdx, UseMI, UseIdx);
+ unsigned Latency =
+ SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);
if (Latency <= 3)
return false;
return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
}
-bool ARMBaseInstrInfo::
-hasLowDefLatency(const TargetSchedModel &SchedModel,
- const MachineInstr *DefMI, unsigned DefIdx) const {
+bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
+ const MachineInstr &DefMI,
+ unsigned DefIdx) const {
const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
if (!ItinData || ItinData->isEmpty())
return false;
- unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;
if (DDomain == ARMII::DomainGeneral) {
- unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
return (DefCycle != -1 && DefCycle <= 2);
}
return false;
}
-bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
+bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const {
- if (convertAddSubFlagsOpcode(MI->getOpcode())) {
+ if (convertAddSubFlagsOpcode(MI.getOpcode())) {
ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
return false;
}
@@ -4082,8 +4118,7 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
// sequence is needed for other targets.
void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
unsigned LoadImmOpc,
- unsigned LoadOpc,
- Reloc::Model RM) const {
+ unsigned LoadOpc) const {
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned Reg = MI->getOperand(0).getReg();
@@ -4094,12 +4129,12 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
.addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
- if (Subtarget.GVIsIndirectSymbol(GV, RM)) {
+ if (Subtarget.isGVIndirectSymbol(GV)) {
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
- unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
MIB.addMemOperand(MMO);
AddDefaultPred(MIB);
}
@@ -4146,24 +4181,24 @@ enum ARMExeDomain {
// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
//
std::pair<uint16_t, uint16_t>
-ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
+ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const {
// If we don't have access to NEON instructions then we won't be able
// to swizzle anything to the NEON domain. Check to make sure.
if (Subtarget.hasNEON()) {
// VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
// if they are not predicated.
- if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
// CortexA9 is particularly picky about mixing the two and wants these
// converted.
- if (Subtarget.isCortexA9() && !isPredicated(MI) &&
- (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR ||
- MI->getOpcode() == ARM::VMOVS))
+ if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&
+ (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||
+ MI.getOpcode() == ARM::VMOVS))
return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
}
// No other instructions can be swizzled, so just determine their domain.
- unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;
if (Domain & ARMII::DomainNEON)
return std::make_pair(ExeNEON, 0);
@@ -4210,12 +4245,11 @@ static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
/// (including the case where the DPR itself is defined), it should not.
///
static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
- MachineInstr *MI,
- unsigned DReg, unsigned Lane,
- unsigned &ImplicitSReg) {
+ MachineInstr &MI, unsigned DReg,
+ unsigned Lane, unsigned &ImplicitSReg) {
// If the DPR is defined or used already, the other SPR lane will be chained
// correctly, so there is nothing to be done.
- if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
+ if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {
ImplicitSReg = 0;
return true;
}
@@ -4224,7 +4258,7 @@ static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
ImplicitSReg = TRI->getSubReg(DReg,
(Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
MachineBasicBlock::LivenessQueryResult LQR =
- MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
+ MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
if (LQR == MachineBasicBlock::LQR_Live)
return true;
@@ -4237,106 +4271,105 @@ static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
return true;
}
-void
-ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
+void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
+ unsigned Domain) const {
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
- MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
- switch (MI->getOpcode()) {
- default:
- llvm_unreachable("cannot handle opcode!");
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("cannot handle opcode!");
+ break;
+ case ARM::VMOVD:
+ if (Domain != ExeNEON)
break;
- case ARM::VMOVD:
- if (Domain != ExeNEON)
- break;
- // Zap the predicate operands.
- assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
- // Make sure we've got NEON instructions.
- assert(Subtarget.hasNEON() && "VORRd requires NEON");
+ // Make sure we've got NEON instructions.
+ assert(Subtarget.hasNEON() && "VORRd requires NEON");
- // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
- DstReg = MI->getOperand(0).getReg();
- SrcReg = MI->getOperand(1).getReg();
+ // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
- for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
- MI->RemoveOperand(i-1);
+ for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
+ MI.RemoveOperand(i - 1);
- // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
- MI->setDesc(get(ARM::VORRd));
- AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
- .addReg(SrcReg)
- .addReg(SrcReg));
+ // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
+ MI.setDesc(get(ARM::VORRd));
+ AddDefaultPred(
+ MIB.addReg(DstReg, RegState::Define).addReg(SrcReg).addReg(SrcReg));
+ break;
+ case ARM::VMOVRS:
+ if (Domain != ExeNEON)
break;
- case ARM::VMOVRS:
- if (Domain != ExeNEON)
- break;
- assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+ assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
- // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
- DstReg = MI->getOperand(0).getReg();
- SrcReg = MI->getOperand(1).getReg();
+ // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
- for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
- MI->RemoveOperand(i-1);
+ for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
+ MI.RemoveOperand(i - 1);
- DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
+ DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
- // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
- // Note that DSrc has been widened and the other lane may be undef, which
- // contaminates the entire register.
- MI->setDesc(get(ARM::VGETLNi32));
- AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
- .addReg(DReg, RegState::Undef)
- .addImm(Lane));
+ // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
+ // Note that DSrc has been widened and the other lane may be undef, which
+ // contaminates the entire register.
+ MI.setDesc(get(ARM::VGETLNi32));
+ AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane));
- // The old source should be an implicit use, otherwise we might think it
- // was dead before here.
- MIB.addReg(SrcReg, RegState::Implicit);
+ // The old source should be an implicit use, otherwise we might think it
+ // was dead before here.
+ MIB.addReg(SrcReg, RegState::Implicit);
+ break;
+ case ARM::VMOVSR: {
+ if (Domain != ExeNEON)
break;
- case ARM::VMOVSR: {
- if (Domain != ExeNEON)
- break;
- assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+ assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
- // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
- DstReg = MI->getOperand(0).getReg();
- SrcReg = MI->getOperand(1).getReg();
+ // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
- DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
+ DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
- unsigned ImplicitSReg;
- if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
- break;
+ unsigned ImplicitSReg;
+ if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
+ break;
- for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
- MI->RemoveOperand(i-1);
+ for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
+ MI.RemoveOperand(i - 1);
- // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
- // Again DDst may be undefined at the beginning of this instruction.
- MI->setDesc(get(ARM::VSETLNi32));
- MIB.addReg(DReg, RegState::Define)
- .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
- .addReg(SrcReg)
- .addImm(Lane);
- AddDefaultPred(MIB);
+ // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
+ // Again DDst may be undefined at the beginning of this instruction.
+ MI.setDesc(get(ARM::VSETLNi32));
+ MIB.addReg(DReg, RegState::Define)
+ .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
+ .addReg(SrcReg)
+ .addImm(Lane);
+ AddDefaultPred(MIB);
- // The narrower destination must be marked as set to keep previous chains
- // in place.
- MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
- if (ImplicitSReg != 0)
- MIB.addReg(ImplicitSReg, RegState::Implicit);
- break;
+ // The narrower destination must be marked as set to keep previous chains
+ // in place.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ if (ImplicitSReg != 0)
+ MIB.addReg(ImplicitSReg, RegState::Implicit);
+ break;
}
case ARM::VMOVS: {
if (Domain != ExeNEON)
break;
// Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
- DstReg = MI->getOperand(0).getReg();
- SrcReg = MI->getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
@@ -4346,16 +4379,16 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
break;
- for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
- MI->RemoveOperand(i-1);
+ for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
+ MI.RemoveOperand(i - 1);
if (DSrc == DDst) {
// Destination can be:
// %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
- MI->setDesc(get(ARM::VDUPLN32d));
+ MI.setDesc(get(ARM::VDUPLN32d));
MIB.addReg(DDst, RegState::Define)
- .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
- .addImm(SrcLane);
+ .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
+ .addImm(SrcLane);
AddDefaultPred(MIB);
// Neither the source or the destination are naturally represented any
@@ -4380,18 +4413,18 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// Pattern of the MachineInstrs is:
// %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
MachineInstrBuilder NewMIB;
- NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- get(ARM::VEXTd32), DDst);
+ NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
+ DDst);
// On the first instruction, both DSrc and DDst may be <undef> if present.
// Specifically when the original instruction didn't have them as an
// <imp-use>.
unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
- bool CurUndef = !MI->readsRegister(CurReg, TRI);
+ bool CurUndef = !MI.readsRegister(CurReg, TRI);
NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
- CurUndef = !MI->readsRegister(CurReg, TRI);
+ CurUndef = !MI.readsRegister(CurReg, TRI);
NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
NewMIB.addImm(1);
@@ -4400,17 +4433,17 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
if (SrcLane == DstLane)
NewMIB.addReg(SrcReg, RegState::Implicit);
- MI->setDesc(get(ARM::VEXTd32));
+ MI.setDesc(get(ARM::VEXTd32));
MIB.addReg(DDst, RegState::Define);
// On the second instruction, DDst has definitely been defined above, so
// it is not <undef>. DSrc, if present, can be <undef> as above.
CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
- CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
MIB.addReg(CurReg, getUndefRegState(CurUndef));
CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
- CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
+ CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
MIB.addReg(CurReg, getUndefRegState(CurUndef));
MIB.addImm(1);
@@ -4446,24 +4479,23 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
//
// FCONSTD can be used as a dependency-breaking instruction.
-unsigned ARMBaseInstrInfo::
-getPartialRegUpdateClearance(const MachineInstr *MI,
- unsigned OpNum,
- const TargetRegisterInfo *TRI) const {
- if (!SwiftPartialUpdateClearance ||
- !(Subtarget.isSwift() || Subtarget.isCortexA15()))
+unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
+ const MachineInstr &MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
+ if (!PartialUpdateClearance)
return 0;
assert(TRI && "Need TRI instance");
- const MachineOperand &MO = MI->getOperand(OpNum);
+ const MachineOperand &MO = MI.getOperand(OpNum);
if (MO.readsReg())
return 0;
unsigned Reg = MO.getReg();
int UseOp = -1;
- switch(MI->getOpcode()) {
- // Normal instructions writing only an S-register.
+ switch (MI.getOpcode()) {
+ // Normal instructions writing only an S-register.
case ARM::VLDRS:
case ARM::FCONSTS:
case ARM::VMOVSR:
@@ -4472,7 +4504,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
case ARM::VMOVv2i32:
case ARM::VMOVv2f32:
case ARM::VMOVv1i64:
- UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
+ UseOp = MI.findRegisterUseOperandIdx(Reg, false, TRI);
break;
// Explicitly reads the dependency.
@@ -4485,37 +4517,35 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
// If this instruction actually reads a value from Reg, there is no unwanted
// dependency.
- if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
+ if (UseOp != -1 && MI.getOperand(UseOp).readsReg())
return 0;
// We must be able to clobber the whole D-reg.
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
// Virtual register must be a foo:ssub_0<def,undef> operand.
- if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
+ if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
return 0;
} else if (ARM::SPRRegClass.contains(Reg)) {
// Physical register: MI must define the full D-reg.
unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
&ARM::DPRRegClass);
- if (!DReg || !MI->definesRegister(DReg, TRI))
+ if (!DReg || !MI.definesRegister(DReg, TRI))
return 0;
}
// MI has an unwanted D-register dependency.
// Avoid defs in the previous N instructrions.
- return SwiftPartialUpdateClearance;
+ return PartialUpdateClearance;
}
// Break a partial register dependency after getPartialRegUpdateClearance
// returned non-zero.
-void ARMBaseInstrInfo::
-breakPartialRegDependency(MachineBasicBlock::iterator MI,
- unsigned OpNum,
- const TargetRegisterInfo *TRI) const {
- assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
+void ARMBaseInstrInfo::breakPartialRegDependency(
+ MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
+ assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");
assert(TRI && "Need TRI instance");
- const MachineOperand &MO = MI->getOperand(OpNum);
+ const MachineOperand &MO = MI.getOperand(OpNum);
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"Can't break virtual register dependencies.");
@@ -4528,7 +4558,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
}
assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
- assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
+ assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
// FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
// the full D-register by loading the same value to both lanes. The
@@ -4538,9 +4568,10 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
// Insert the dependency-breaking FCONSTD before MI.
// 96 is the encoding of 0.5, but the actual value doesn't matter here.
- AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- get(ARM::FCONSTD), DReg).addImm(96));
- MI->addRegisterKilled(DReg, TRI, true);
+ AddDefaultPred(
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
+ .addImm(96));
+ MI.addRegisterKilled(DReg, TRI, true);
}
bool ARMBaseInstrInfo::hasNOP() const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index d80c49494c77..52b0ff17dea2 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -36,8 +36,7 @@ protected:
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
void expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
- unsigned LoadImmOpc, unsigned LoadOpc,
- Reloc::Model RM) const;
+ unsigned LoadImmOpc, unsigned LoadOpc) const;
/// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
/// and \p DefIdx.
@@ -93,8 +92,7 @@ protected:
/// non-commutable pair of operand indices OpIdx1 and OpIdx2.
/// Even though the instruction is commutable, the method may still
/// fail to commute the operands, null pointer is returned in such cases.
- MachineInstr *commuteInstructionImpl(MachineInstr *MI,
- bool NewMI,
+ MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const override;
@@ -107,7 +105,7 @@ public:
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI,
+ MachineInstr &MI,
LiveVariables *LV) const override;
virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
@@ -122,49 +120,49 @@ public:
const ScheduleDAG *DAG) const override;
// Branch analysis.
- bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify = false) const override;
unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
- DebugLoc DL) const override;
+ const DebugLoc &DL) const override;
bool
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
// Predication support.
- bool isPredicated(const MachineInstr *MI) const override;
+ bool isPredicated(const MachineInstr &MI) const override;
- ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
- int PIdx = MI->findFirstPredOperandIdx();
- return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+ ARMCC::CondCodes getPredicate(const MachineInstr &MI) const {
+ int PIdx = MI.findFirstPredOperandIdx();
+ return PIdx != -1 ? (ARMCC::CondCodes)MI.getOperand(PIdx).getImm()
: ARMCC::AL;
}
- bool PredicateInstruction(MachineInstr *MI,
- ArrayRef<MachineOperand> Pred) const override;
+ bool PredicateInstruction(MachineInstr &MI,
+ ArrayRef<MachineOperand> Pred) const override;
bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
ArrayRef<MachineOperand> Pred2) const override;
- bool DefinesPredicate(MachineInstr *MI,
+ bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(MachineInstr *MI) const override;
+ bool isPredicable(MachineInstr &MI) const override;
/// GetInstSize - Returns the size of the specified MachineInstr.
///
- virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+ virtual unsigned GetInstSizeInBytes(const MachineInstr &MI) const;
- unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- unsigned isStoreToStackSlot(const MachineInstr *MI,
+ unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const override;
- unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ unsigned isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const override;
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -175,7 +173,7 @@ public:
const ARMSubtarget &Subtarget) const;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -190,21 +188,21 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+ bool expandPostRAPseudo(MachineInstr &MI) const override;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig,
+ const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const override;
- MachineInstr *duplicate(MachineInstr *Orig,
+ MachineInstr *duplicate(MachineInstr &Orig,
MachineFunction &MF) const override;
const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) const;
- bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1,
+ bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1,
const MachineRegisterInfo *MRI) const override;
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
@@ -227,7 +225,7 @@ public:
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
- bool isSchedulingBoundary(const MachineInstr *MI,
+ bool isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const override;
@@ -252,7 +250,7 @@ public:
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
- bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &CmpMask,
int &CmpValue) const override;
@@ -260,30 +258,29 @@ public:
/// that we can remove a "comparison with zero"; Remove a redundant CMP
/// instruction if the flags can be updated in the same way by an earlier
/// instruction such as SUB.
- bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
- bool analyzeSelect(const MachineInstr *MI,
- SmallVectorImpl<MachineOperand> &Cond,
- unsigned &TrueOp, unsigned &FalseOp,
- bool &Optimizable) const override;
+ bool analyzeSelect(const MachineInstr &MI,
+ SmallVectorImpl<MachineOperand> &Cond, unsigned &TrueOp,
+ unsigned &FalseOp, bool &Optimizable) const override;
- MachineInstr *optimizeSelect(MachineInstr *MI,
+ MachineInstr *optimizeSelect(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &SeenMIs,
bool) const override;
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
- bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const override;
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const override;
unsigned getNumMicroOps(const InstrItineraryData *ItinData,
- const MachineInstr *MI) const override;
+ const MachineInstr &MI) const override;
int getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI,
+ const MachineInstr &DefMI, unsigned DefIdx,
+ const MachineInstr &UseMI,
unsigned UseIdx) const override;
int getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
@@ -291,19 +288,20 @@ public:
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
- getExecutionDomain(const MachineInstr *MI) const override;
- void setExecutionDomain(MachineInstr *MI, unsigned Domain) const override;
+ getExecutionDomain(const MachineInstr &MI) const override;
+ void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override;
- unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned,
- const TargetRegisterInfo*) const override;
- void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
+ unsigned
+ getPartialRegUpdateClearance(const MachineInstr &, unsigned,
+ const TargetRegisterInfo *) const override;
+ void breakPartialRegDependency(MachineInstr &, unsigned,
const TargetRegisterInfo *TRI) const override;
/// Get the number of addresses by LDM or VLDM or zero for unknown.
- unsigned getNumLDMAddresses(const MachineInstr *MI) const;
+ unsigned getNumLDMAddresses(const MachineInstr &MI) const;
private:
- unsigned getInstBundleLength(const MachineInstr *MI) const;
+ unsigned getInstBundleLength(const MachineInstr &MI) const;
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
@@ -327,10 +325,17 @@ private:
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const;
- unsigned getPredicationCost(const MachineInstr *MI) const override;
+ int getOperandLatencyImpl(const InstrItineraryData *ItinData,
+ const MachineInstr &DefMI, unsigned DefIdx,
+ const MCInstrDesc &DefMCID, unsigned DefAdj,
+ const MachineOperand &DefMO, unsigned Reg,
+ const MachineInstr &UseMI, unsigned UseIdx,
+ const MCInstrDesc &UseMCID, unsigned UseAdj) const;
+
+ unsigned getPredicationCost(const MachineInstr &MI) const override;
unsigned getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
+ const MachineInstr &MI,
unsigned *PredCost = nullptr) const override;
int getInstrLatency(const InstrItineraryData *ItinData,
@@ -338,19 +343,18 @@ private:
bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
const MachineRegisterInfo *MRI,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI,
+ const MachineInstr &DefMI, unsigned DefIdx,
+ const MachineInstr &UseMI,
unsigned UseIdx) const override;
bool hasLowDefLatency(const TargetSchedModel &SchedModel,
- const MachineInstr *DefMI,
+ const MachineInstr &DefMI,
unsigned DefIdx) const override;
/// verifyInstruction - Perform target specific instruction verification.
- bool verifyInstruction(const MachineInstr *MI,
+ bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
- virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
- Reloc::Model RM) const = 0;
+ virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI) const = 0;
void expandMEMCPY(MachineBasicBlock::iterator) const;
@@ -447,7 +451,7 @@ static inline bool isPushOpcode(int Opc) {
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
-ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
unsigned getMatchingCondBranchOpcode(unsigned Opc);
@@ -466,21 +470,24 @@ unsigned convertAddSubFlagsOpcode(unsigned OldOpc);
/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
/// code.
void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, DebugLoc dl,
- unsigned DestReg, unsigned BaseReg, int NumBytes,
+ MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg,
+ unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, DebugLoc dl,
- unsigned DestReg, unsigned BaseReg, int NumBytes,
+ MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg,
+ unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, DebugLoc dl,
- unsigned DestReg, unsigned BaseReg,
- int NumBytes, const TargetInstrInfo &TII,
- const ARMBaseRegisterInfo& MRI,
+ MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg,
+ unsigned BaseReg, int NumBytes,
+ const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo &MRI,
unsigned MIFlags = 0);
/// Tries to add registers to the reglist of a given base-updating
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index a5207705fc69..aa968efc37d4 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -49,12 +49,9 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
: ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
static unsigned getFramePointerReg(const ARMSubtarget &STI) {
- if (STI.isTargetMachO()) {
- if (STI.isTargetDarwin() || STI.isThumb1Only())
- return ARM::R7;
- else
- return ARM::R11;
- } else if (STI.isTargetWindows())
+ if (STI.isTargetMachO())
+ return ARM::R7;
+ else if (STI.isTargetWindows())
return ARM::R11;
else // ARM EABI
return STI.isThumb() ? ARM::R7 : ARM::R11;
@@ -63,8 +60,11 @@ static unsigned getFramePointerReg(const ARMSubtarget &STI) {
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
+ bool UseSplitPush = STI.splitFramePushPop();
const MCPhysReg *RegList =
- STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ STI.isTargetDarwin()
+ ? CSR_iOS_SaveList
+ : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList);
const Function *F = MF->getFunction();
if (F->getCallingConv() == CallingConv::GHC) {
@@ -75,7 +75,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (STI.isMClass()) {
// M-class CPUs have hardware which saves the registers needed to allow a
// function conforming to the AAPCS to function as a handler.
- return CSR_AAPCS_SaveList;
+ return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
} else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") {
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
// need to be saved to restore user-mode state.
@@ -87,6 +87,10 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
}
+ if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() &&
+ F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ return CSR_iOS_SwiftError_SaveList;
+
if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS)
return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
? CSR_iOS_CXX_TLS_PE_SaveList
@@ -110,6 +114,11 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
+
+ if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() &&
+ MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ return CSR_iOS_SwiftError_RegMask;
+
if (STI.isTargetDarwin() && CC == CallingConv::CXX_FAST_TLS)
return CSR_iOS_CXX_TLS_RegMask;
return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
@@ -167,9 +176,8 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::R9);
// Reserve D16-D31 if the subtarget doesn't support them.
if (!STI.hasVFP3() || STI.hasD16()) {
- assert(ARM::D31 == ARM::D16 + 15);
- for (unsigned i = 0; i != 16; ++i)
- Reserved.set(ARM::D16 + i);
+ static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!");
+ Reserved.set(ARM::D16, ARM::D31 + 1);
}
const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
@@ -400,13 +408,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
-void ARMBaseRegisterInfo::
-emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx, int Val,
- ARMCC::CondCodes Pred,
- unsigned PredReg, unsigned MIFlags) const {
+void ARMBaseRegisterInfo::emitLoadConstPool(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 6a9a45a65687..1eee94857e05 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -166,12 +166,12 @@ public:
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
- virtual void emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, unsigned DestReg, unsigned SubIdx,
- int Val, ARMCC::CondCodes Pred = ARMCC::AL,
- unsigned PredReg = 0,
- unsigned MIFlags = MachineInstr::NoFlags)const;
+ virtual void
+ emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg, unsigned SubIdx,
+ int Val, ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
/// Code Generation virtual methods...
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index a731d00883a1..71b819362404 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -211,7 +211,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
// First consume all registers that would give an unaligned object. Whether
// we go on stack or in regs, no-one will be using them in future.
- unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4;
+ unsigned RegAlign = alignTo(Align, 4) / 4;
while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
State.AllocateReg(RegList[RegIdx++]);
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 847ef87c1b26..edb69581b9d3 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -23,6 +23,12 @@ def CC_ARM_APCS : CallingConv<[
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Pass SwiftSelf in a callee saved register.
+ CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
+
+ // A SwiftError is passed in R6.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -42,6 +48,12 @@ def RetCC_ARM_APCS : CallingConv<[
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[f32], CCBitConvertToType<i32>>,
+ // Pass SwiftSelf in a callee saved register.
+ CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
+
+ // A SwiftError is returned in R6.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -151,6 +163,12 @@ def CC_ARM_AAPCS : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ // Pass SwiftSelf in a callee saved register.
+ CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
+
+ // A SwiftError is passed in R6.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
CCDelegateTo<CC_ARM_AAPCS_Common>
@@ -161,6 +179,12 @@ def RetCC_ARM_AAPCS : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ // Pass SwiftSelf in a callee saved register.
+ CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
+
+ // A SwiftError is returned in R6.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
@@ -179,6 +203,12 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ // Pass SwiftSelf in a callee saved register.
+ CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
+
+ // A SwiftError is passed in R6.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+
// HFAs are passed in a contiguous block of registers, or on the stack
CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
@@ -194,6 +224,12 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ // Pass SwiftSelf in a callee saved register.
+ CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
+
+ // A SwiftError is returned in R6.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
@@ -210,6 +246,14 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8))>;
+// The order of callee-saved registers needs to match the order we actually push
+// them in FrameLowering, because this order is what's used by
+// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame
+// pointer, we use this AAPCS alternative.
+def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+ R11, R10, R9, R8,
+ (sequence "D%u", 15, 8))>;
+
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
// and the pointer return value are both passed in R0 in these cases, this can
// be partially modelled by treating R0 as a callee-saved register
@@ -222,6 +266,9 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+// R6 is used to pass swifterror, remove it from CSR.
+def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>;
+
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
@@ -235,10 +282,11 @@ def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1),
(sequence "D%u", 31, 0))>;
// CSRs that are handled by prologue, epilogue.
-def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>;
+def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR, R12, R11, R7, R5, R4)>;
// CSRs that are handled explicitly via copies.
-def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>;
+def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS,
+ CSR_iOS_CXX_TLS_PE)>;
// The "interrupt" attribute is used to generate code that is acceptable in
// exception-handlers of various kinds. It makes us use a different return
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 55c1684028c2..8511f67dccd5 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -53,6 +53,11 @@ static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
+static cl::opt<unsigned>
+CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
+ cl::desc("The max number of iteration for converge"));
+
+
/// UnknownPadding - Return the worst case padding that could result from
/// unknown offset bits. This does not include alignment padding caused by
/// known offset bits.
@@ -274,6 +279,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return "ARM constant island placement and branch shortening pass";
}
@@ -293,10 +303,10 @@ namespace {
unsigned getCombinedIndex(const MachineInstr *CPEMI);
int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
bool findAvailableWater(CPUser&U, unsigned UserOffset,
- water_iterator &WaterIter);
+ water_iterator &WaterIter, bool CloserWater);
void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
MachineBasicBlock *&NewMBB);
- bool handleConstantPoolUser(unsigned CPUserIndex);
+ bool handleConstantPoolUser(unsigned CPUserIndex, bool CloserWater);
void removeDeadCPEMI(MachineInstr *CPEMI);
bool removeUnusedCPEntries();
bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
@@ -456,8 +466,11 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
bool CPChange = false;
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
- CPChange |= handleConstantPoolUser(i);
- if (CPChange && ++NoCPIters > 30)
+ // For most inputs, it converges in no more than 5 iterations.
+ // If it doesn't end in 10, the input may have huge BB or many CPEs.
+ // In this case, we will try different heuristics.
+ CPChange |= handleConstantPoolUser(i, NoCPIters >= CPMaxIteration / 2);
+ if (CPChange && ++NoCPIters > CPMaxIteration)
report_fatal_error("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
@@ -478,10 +491,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MadeChange = true;
}
- // Shrink 32-bit Thumb2 branch, load, and store instructions.
+ // Shrink 32-bit Thumb2 load and store instructions.
if (isThumb2 && !STI->prefers32BitThumb())
MadeChange |= optimizeThumb2Instructions();
+ // Shrink 32-bit branch instructions.
+ if (isThumb && STI->hasV8MBaselineOps())
+ MadeChange |= optimizeThumb2Branches();
+
+ // Optimize jump tables using TBB / TBH.
+ if (isThumb2)
+ MadeChange |= optimizeThumb2JumpTables();
+
// After a while, this might be made debug-only, but it is not expensive.
verify();
@@ -654,7 +675,7 @@ bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
// have an unconditional branch for whatever reason.
MachineBasicBlock *TBB, *FBB;
SmallVector<MachineOperand, 4> Cond;
- bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
+ bool TooDifficult = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
return TooDifficult || FBB == nullptr;
}
@@ -701,14 +722,10 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
/// information about the sizes of each block and the locations of all
/// the jump tables.
void ARMConstantIslands::scanFunctionJumpTables() {
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I)
- if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
- T2JumpTables.push_back(I);
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &I : MBB)
+ if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(&I);
}
}
@@ -735,22 +752,18 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
adjustBBOffsetsAfter(&MF->front());
// Now go back through the instructions and build up our data structures.
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
-
+ for (MachineBasicBlock &MBB : *MF) {
// If this block doesn't fall through into the next MBB, then this is
// 'water' that a constant pool island could be placed.
if (!BBHasFallthrough(&MBB))
WaterList.push_back(&MBB);
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- if (I->isDebugValue())
+ for (MachineInstr &I : MBB) {
+ if (I.isDebugValue())
continue;
- unsigned Opc = I->getOpcode();
- if (I->isBranch()) {
+ unsigned Opc = I.getOpcode();
+ if (I.isBranch()) {
bool isCond = false;
unsigned Bits = 0;
unsigned Scale = 1;
@@ -759,7 +772,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
default:
continue; // Ignore other JT branches
case ARM::t2BR_JT:
- T2JumpTables.push_back(I);
+ T2JumpTables.push_back(&I);
continue; // Does not get an entry in ImmBranches
case ARM::Bcc:
isCond = true;
@@ -793,11 +806,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Record this immediate branch.
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
- ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ ImmBranches.push_back(ImmBranch(&I, MaxOffs, isCond, UOpc));
}
if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
- PushPopMIs.push_back(I);
+ PushPopMIs.push_back(&I);
if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
@@ -805,8 +818,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
continue;
// Scan the instructions for constant pool operands.
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
- if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
+ for (unsigned op = 0, e = I.getNumOperands(); op != e; ++op)
+ if (I.getOperand(op).isCPI() || I.getOperand(op).isJTI()) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
@@ -865,15 +878,15 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
}
// Remember that this is a user of a CP entry.
- unsigned CPI = I->getOperand(op).getIndex();
- if (I->getOperand(op).isJTI()) {
+ unsigned CPI = I.getOperand(op).getIndex();
+ if (I.getOperand(op).isJTI()) {
JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
CPI = JumpTableEntryIndices[CPI];
}
MachineInstr *CPEMI = CPEMIs[CPI];
unsigned MaxOffs = ((1 << Bits)-1) * Scale;
- CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+ CPUsers.push_back(CPUser(&I, CPEMI, MaxOffs, NegOk, IsSoImm));
// Increment corresponding CPEntry reference count.
CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
@@ -896,15 +909,14 @@ void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
BBI.Unalign = 0;
BBI.PostAlign = 0;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
+ for (MachineInstr &I : *MBB) {
BBI.Size += TII->GetInstSizeInBytes(I);
// For inline asm, GetInstSizeInBytes returns a conservative estimate.
// The actual size may be smaller, but still a multiple of the instr size.
- if (I->isInlineAsm())
+ if (I.isInlineAsm())
BBI.Unalign = isThumb ? 1 : 2;
// Also consider instructions that may be shrunk later.
- else if (isThumb && mayOptimizeThumb2Instruction(I))
+ else if (isThumb && mayOptimizeThumb2Instruction(&I))
BBI.Unalign = 1;
}
@@ -929,7 +941,7 @@ unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
// Sum instructions before MI in MBB.
for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
- Offset += TII->GetInstSizeInBytes(I);
+ Offset += TII->GetInstSizeInBytes(*I);
}
return Offset;
}
@@ -1108,7 +1120,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
Growth = CPEEnd - NextBlockOffset;
// Compute the padding that would go at the end of the CPE to align the next
// block.
- Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+ Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment);
// If the CPE is to be inserted before the instruction, that will raise
// the offset of the instruction. Also account for unknown alignment padding
@@ -1285,11 +1297,27 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
/// move to a lower address, so search backward from the end of the list and
/// prefer the first water that is in range.
bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
- water_iterator &WaterIter) {
+ water_iterator &WaterIter,
+ bool CloserWater) {
if (WaterList.empty())
return false;
unsigned BestGrowth = ~0u;
+ // The nearest water without splitting the UserBB is right after it.
+ // If the distance is still large (we have a big BB), then we need to split it
+ // if we don't converge after certain iterations. This helps the following
+ // situation to converge:
+ // BB0:
+ // Big BB
+ // BB1:
+ // Constant Pool
+ // When a CP access is out of range, BB0 may be used as water. However,
+ // inserting islands between BB0 and BB1 makes other accesses out of range.
+ MachineBasicBlock *UserBB = U.MI->getParent();
+ unsigned MinNoSplitDisp =
+ BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI));
+ if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
+ return false;
for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
--IP) {
MachineBasicBlock* WaterBB = *IP;
@@ -1301,6 +1329,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
// should be relatively uncommon and when it does happen, we want to be
// sure to take advantage of it for all the CPEs near that block, so that
// we don't insert more branches than necessary.
+ // When CloserWater is true, we try to find the lowest address after (or
+ // equal to) user MI's BB no matter of padding growth.
unsigned Growth;
if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
@@ -1312,8 +1342,11 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
<< " Growth=" << Growth << '\n');
- // Keep looking unless it is perfect.
- if (BestGrowth == 0)
+ if (CloserWater && WaterBB == U.MI->getParent())
+ return true;
+ // Keep looking unless it is perfect and we're not looking for the lowest
+ // possible address.
+ if (!CloserWater && BestGrowth == 0)
return true;
}
if (IP == B)
@@ -1416,7 +1449,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// iterates at least once.
BaseInsertOffset =
std::max(UserBBI.postOffset() - UPad - 8,
- UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
+ UserOffset + TII->GetInstSizeInBytes(*UserMI) + 1);
DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
}
unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1426,11 +1459,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned CPUIndex = CPUserIndex+1;
unsigned NumCPUsers = CPUsers.size();
MachineInstr *LastIT = nullptr;
- for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ for (unsigned Offset = UserOffset + TII->GetInstSizeInBytes(*UserMI);
Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) {
+ Offset += TII->GetInstSizeInBytes(*MI), MI = std::next(MI)) {
assert(MI != UserMBB->end() && "Fell off end of block");
- if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) {
CPUser &U = CPUsers[CPUIndex];
if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
// Shift intertion point by one unit of alignment so it is within reach.
@@ -1447,7 +1480,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Remember the last IT instruction.
if (MI->getOpcode() == ARM::t2IT)
- LastIT = MI;
+ LastIT = &*MI;
}
--MI;
@@ -1455,23 +1488,24 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Avoid splitting an IT block.
if (LastIT) {
unsigned PredReg = 0;
- ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
if (CC != ARMCC::AL)
MI = LastIT;
}
// We really must not split an IT block.
DEBUG(unsigned PredReg;
- assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
+ assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL));
- NewMBB = splitBlockBeforeInstr(MI);
+ NewMBB = splitBlockBeforeInstr(&*MI);
}
/// handleConstantPoolUser - Analyze the specified user, checking to see if it
/// is out-of-range. If so, pick up the constant pool value and move it some
/// place in-range. Return true if we changed any addresses (thus must run
/// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
+bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
+ bool CloserWater) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
@@ -1494,7 +1528,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
water_iterator IP;
- if (findAvailableWater(U, UserOffset, IP)) {
+ if (findAvailableWater(U, UserOffset, IP, CloserWater)) {
DEBUG(dbgs() << "Found water in range\n");
MachineBasicBlock *WaterBB = *IP;
@@ -1584,7 +1618,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
CPEBB->setAlignment(0);
} else
// Entries are sorted by descending alignment, so realign from the front.
- CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+ CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin()));
adjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
@@ -1728,7 +1762,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
splitBlockBeforeInstr(MI);
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
- int delta = TII->GetInstSizeInBytes(&MBB->back());
+ int delta = TII->GetInstSizeInBytes(MBB->back());
BBInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
@@ -1744,18 +1778,18 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
.addImm(ARMCC::AL).addReg(0);
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI);
MI->eraseFromParent();
adjustBBOffsetsAfter(MBB);
return true;
@@ -1852,8 +1886,6 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
}
}
- MadeChange |= optimizeThumb2Branches();
- MadeChange |= optimizeThumb2JumpTables();
return MadeChange;
}
@@ -1910,7 +1942,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
NewOpc = 0;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
if (Pred == ARMCC::EQ)
NewOpc = ARM::tCBZ;
else if (Pred == ARMCC::NE)
@@ -1928,7 +1960,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
--CmpMI;
if (CmpMI->getOpcode() == ARM::tCMPi8) {
unsigned Reg = CmpMI->getOperand(0).getReg();
- Pred = getInstrPredicate(CmpMI, PredReg);
+ Pred = getInstrPredicate(*CmpMI, PredReg);
if (Pred == ARMCC::AL &&
CmpMI->getOperand(1).getImm() == 0 &&
isARMLowRegister(Reg)) {
@@ -2170,8 +2202,8 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
}
}
- unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
- unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+ unsigned NewSize = TII->GetInstSizeInBytes(*NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(*MI);
MI->eraseFromParent();
int Delta = OrigSize - NewSize + DeadSize;
@@ -2240,13 +2272,13 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
MachineFunction::iterator OldPrior = std::prev(BBi);
// If the block terminator isn't analyzable, don't try to move the block
- bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+ bool B = TII->analyzeBranch(*BB, TBB, FBB, Cond);
// If the block ends in an unconditional branch, move it. The prior block
// has to have an analyzable terminator for us to move this one. Be paranoid
// and make sure we're not trying to move the entry block of the function.
- if (!B && Cond.empty() && BB != MF->begin() &&
- !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ if (!B && Cond.empty() && BB != &MF->front() &&
+ !TII->analyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
BB->moveAfter(JTBB);
OldPrior->updateTerminator();
BB->updateTerminator();
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index c9849b2605ea..c0db001cb6f1 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -50,11 +50,18 @@ const char *ARMConstantPoolValue::getModifierText() const {
switch (Modifier) {
// FIXME: Are these case sensitive? It'd be nice to lower-case all the
// strings if that's legal.
- case ARMCP::no_modifier: return "none";
- case ARMCP::TLSGD: return "tlsgd";
- case ARMCP::GOT_PREL: return "GOT_PREL";
- case ARMCP::GOTTPOFF: return "gottpoff";
- case ARMCP::TPOFF: return "tpoff";
+ case ARMCP::no_modifier:
+ return "none";
+ case ARMCP::TLSGD:
+ return "tlsgd";
+ case ARMCP::GOT_PREL:
+ return "GOT_PREL";
+ case ARMCP::GOTTPOFF:
+ return "gottpoff";
+ case ARMCP::TPOFF:
+ return "tpoff";
+ case ARMCP::SECREL:
+ return "secrel32";
}
llvm_unreachable("Unknown modifier!");
}
@@ -74,9 +81,9 @@ bool
ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
if (ACPV->Kind == Kind &&
ACPV->PCAdjust == PCAdjust &&
- ACPV->Modifier == Modifier) {
- if (ACPV->LabelId == LabelId)
- return true;
+ ACPV->Modifier == Modifier &&
+ ACPV->LabelId == LabelId &&
+ ACPV->AddCurrentAddress == AddCurrentAddress) {
// Two PC relative constpool entries containing the same GV address or
// external symbols. FIXME: What about blockaddress?
if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
@@ -85,7 +92,7 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
return false;
}
-void ARMConstantPoolValue::dump() const {
+LLVM_DUMP_METHOD void ARMConstantPoolValue::dump() const {
errs() << " " << *this;
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6b18a4e52878..c07331d71dad 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -37,11 +37,12 @@ namespace ARMCP {
};
enum ARMCPModifier {
- no_modifier,
- TLSGD,
- GOT_PREL,
- GOTTPOFF,
- TPOFF
+ no_modifier, /// None
+ TLSGD, /// Thread Local Storage (General Dynamic Mode)
+ GOT_PREL, /// Global Offset Table, PC Relative
+ GOTTPOFF, /// Global Offset Table, Thread Pointer Offset
+ TPOFF, /// Thread Pointer Offset
+ SECREL, /// Section Relative (Windows TLS)
};
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 56f3498e1204..56f5728ecfb8 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -20,6 +20,7 @@
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -50,6 +51,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return "ARM pseudo instruction expansion pass";
}
@@ -58,7 +64,8 @@ namespace {
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI);
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
bool ExpandMBB(MachineBasicBlock &MBB);
void ExpandVLD(MachineBasicBlock::iterator &MBBI);
void ExpandVST(MachineBasicBlock::iterator &MBBI);
@@ -67,6 +74,14 @@ namespace {
unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
+ bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
+ unsigned StrexOp, unsigned UxtOp,
+ MachineBasicBlock::iterator &NextMBBI);
+
+ bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
};
char ARMExpandPseudo::ID = 0;
}
@@ -651,7 +666,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
@@ -737,8 +752,242 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MI.eraseFromParent();
}
+static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
+ for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+ MBB->addLiveIn(*I);
+}
+
+/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
+/// possible. This only gets used at -O0 so we don't care about efficiency of the
+/// generated code.
+bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned LdrexOp, unsigned StrexOp,
+ unsigned UxtOp,
+ MachineBasicBlock::iterator &NextMBBI) {
+ bool IsThumb = STI->isThumb();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineOperand &Dest = MI.getOperand(0);
+ unsigned StatusReg = MI.getOperand(1).getReg();
+ MachineOperand &Addr = MI.getOperand(2);
+ MachineOperand &Desired = MI.getOperand(3);
+ MachineOperand &New = MI.getOperand(4);
+
+ LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ for (auto I = std::prev(MBB.end()); I != MBBI; --I)
+ LiveRegs.stepBackward(*I);
+
+ MachineFunction *MF = MBB.getParent();
+ auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MBB.getIterator(), LoadCmpBB);
+ MF->insert(++LoadCmpBB->getIterator(), StoreBB);
+ MF->insert(++StoreBB->getIterator(), DoneBB);
+
+ if (UxtOp) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg())
+ .addReg(Desired.getReg(), RegState::Kill);
+ if (!IsThumb)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
+ }
+
+ // .Lloadcmp:
+ // ldrex rDest, [rAddr]
+ // cmp rDest, rDesired
+ // bne .Ldone
+ LoadCmpBB->addLiveIn(Addr.getReg());
+ LoadCmpBB->addLiveIn(Dest.getReg());
+ LoadCmpBB->addLiveIn(Desired.getReg());
+ addPostLoopLiveIns(LoadCmpBB, LiveRegs);
+
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
+ MIB.addReg(Addr.getReg());
+ if (LdrexOp == ARM::t2LDREX)
+ MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
+ AddDefaultPred(MIB);
+
+ unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
+ AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
+ .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
+ .addOperand(Desired));
+ unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
+ BuildMI(LoadCmpBB, DL, TII->get(Bcc))
+ .addMBB(DoneBB)
+ .addImm(ARMCC::NE)
+ .addReg(ARM::CPSR, RegState::Kill);
+ LoadCmpBB->addSuccessor(DoneBB);
+ LoadCmpBB->addSuccessor(StoreBB);
+
+ // .Lstore:
+ // strex rStatus, rNew, [rAddr]
+ // cmp rStatus, #0
+ // bne .Lloadcmp
+ StoreBB->addLiveIn(Addr.getReg());
+ StoreBB->addLiveIn(New.getReg());
+ addPostLoopLiveIns(StoreBB, LiveRegs);
+
+
+ MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg);
+ MIB.addOperand(New);
+ MIB.addOperand(Addr);
+ if (StrexOp == ARM::t2STREX)
+ MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
+ AddDefaultPred(MIB);
+
+ unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
+ AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
+ .addReg(StatusReg, RegState::Kill)
+ .addImm(0));
+ BuildMI(StoreBB, DL, TII->get(Bcc))
+ .addMBB(LoadCmpBB)
+ .addImm(ARMCC::NE)
+ .addReg(ARM::CPSR, RegState::Kill);
+ StoreBB->addSuccessor(LoadCmpBB);
+ StoreBB->addSuccessor(DoneBB);
+
+ DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
+ DoneBB->transferSuccessors(&MBB);
+ addPostLoopLiveIns(DoneBB, LiveRegs);
+
+ MBB.addSuccessor(LoadCmpBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+ return true;
+}
+
+/// ARM's ldrexd/strexd take a consecutive register pair (represented as a
+/// single GPRPair register), Thumb's take two separate registers so we need to
+/// extract the subregs from the pair.
+static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
+ unsigned Flags, bool IsThumb,
+ const TargetRegisterInfo *TRI) {
+ if (IsThumb) {
+ unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
+ unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
+ MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead()));
+ MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead()));
+ } else
+ MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead()));
+}
+
+/// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
+bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ bool IsThumb = STI->isThumb();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineOperand &Dest = MI.getOperand(0);
+ unsigned StatusReg = MI.getOperand(1).getReg();
+ MachineOperand &Addr = MI.getOperand(2);
+ MachineOperand &Desired = MI.getOperand(3);
+ MachineOperand &New = MI.getOperand(4);
+
+ unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
+ unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
+ unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0);
+ unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1);
+
+ LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ for (auto I = std::prev(MBB.end()); I != MBBI; --I)
+ LiveRegs.stepBackward(*I);
+
+ MachineFunction *MF = MBB.getParent();
+ auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MBB.getIterator(), LoadCmpBB);
+ MF->insert(++LoadCmpBB->getIterator(), StoreBB);
+ MF->insert(++StoreBB->getIterator(), DoneBB);
+
+ // .Lloadcmp:
+ // ldrexd rDestLo, rDestHi, [rAddr]
+ // cmp rDestLo, rDesiredLo
+ // sbcs rStatus<dead>, rDestHi, rDesiredHi
+ // bne .Ldone
+ LoadCmpBB->addLiveIn(Addr.getReg());
+ LoadCmpBB->addLiveIn(Dest.getReg());
+ LoadCmpBB->addLiveIn(Desired.getReg());
+ addPostLoopLiveIns(LoadCmpBB, LiveRegs);
+
+ unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
+ addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
+ MIB.addReg(Addr.getReg());
+ AddDefaultPred(MIB);
+
+ unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
+ AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
+ .addReg(DestLo, getKillRegState(Dest.isDead()))
+ .addReg(DesiredLo, getKillRegState(Desired.isDead())));
+
+ unsigned SBCrr = IsThumb ? ARM::t2SBCrr : ARM::SBCrr;
+ MIB = BuildMI(LoadCmpBB, DL, TII->get(SBCrr))
+ .addReg(StatusReg, RegState::Define | RegState::Dead)
+ .addReg(DestHi, getKillRegState(Dest.isDead()))
+ .addReg(DesiredHi, getKillRegState(Desired.isDead()));
+ AddDefaultPred(MIB);
+ MIB.addReg(ARM::CPSR, RegState::Kill);
+
+ unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
+ BuildMI(LoadCmpBB, DL, TII->get(Bcc))
+ .addMBB(DoneBB)
+ .addImm(ARMCC::NE)
+ .addReg(ARM::CPSR, RegState::Kill);
+ LoadCmpBB->addSuccessor(DoneBB);
+ LoadCmpBB->addSuccessor(StoreBB);
+
+ // .Lstore:
+ // strexd rStatus, rNewLo, rNewHi, [rAddr]
+ // cmp rStatus, #0
+ // bne .Lloadcmp
+ StoreBB->addLiveIn(Addr.getReg());
+ StoreBB->addLiveIn(New.getReg());
+ addPostLoopLiveIns(StoreBB, LiveRegs);
+
+ unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
+ MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
+ addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
+ MIB.addOperand(Addr);
+ AddDefaultPred(MIB);
+
+ unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
+ AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
+ .addReg(StatusReg, RegState::Kill)
+ .addImm(0));
+ BuildMI(StoreBB, DL, TII->get(Bcc))
+ .addMBB(LoadCmpBB)
+ .addImm(ARMCC::NE)
+ .addReg(ARM::CPSR, RegState::Kill);
+ StoreBB->addSuccessor(LoadCmpBB);
+ StoreBB->addSuccessor(DoneBB);
+
+ DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
+ DoneBB->transferSuccessors(&MBB);
+ addPostLoopLiveIns(DoneBB, LiveRegs);
+
+ MBB.addSuccessor(LoadCmpBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+ return true;
+}
+
+
bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) {
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
@@ -784,7 +1033,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addReg(JumpTarget.getReg(), RegState::Kill);
}
- MachineInstr *NewMI = std::prev(MBBI);
+ auto NewMI = std::prev(MBBI);
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
NewMI->addOperand(MBBI->getOperand(i));
@@ -1375,6 +1624,30 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
+
+ case ARM::CMP_SWAP_8:
+ if (STI->isThumb())
+ return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB,
+ ARM::tUXTB, NextMBBI);
+ else
+ return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB,
+ ARM::UXTB, NextMBBI);
+ case ARM::CMP_SWAP_16:
+ if (STI->isThumb())
+ return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH,
+ ARM::tUXTH, NextMBBI);
+ else
+ return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH,
+ ARM::UXTH, NextMBBI);
+ case ARM::CMP_SWAP_32:
+ if (STI->isThumb())
+ return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0,
+ NextMBBI);
+ else
+ return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
+
+ case ARM::CMP_SWAP_64:
+ return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
}
}
@@ -1384,7 +1657,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
- Modified |= ExpandMI(MBB, MBBI);
+ Modified |= ExpandMI(MBB, MBBI, NMBBI);
MBBI = NMBBI;
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index ff2fcfa349dc..13724da5d4f7 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -22,7 +22,6 @@
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -41,7 +40,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -110,11 +108,6 @@ class ARMFastISel final : public FastISel {
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
- unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- unsigned Op2, bool Op2IsKill);
unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
@@ -164,6 +157,7 @@ class ARMFastISel final : public FastISel {
// Utility routines.
private:
+ bool isPositionIndependent() const;
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -215,7 +209,7 @@ class ARMFastISel final : public FastISel {
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
void AddLoadStoreOperands(MVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags, bool useAM3);
+ MachineMemOperand::Flags Flags, bool useAM3);
};
} // end anonymous namespace
@@ -331,38 +325,6 @@ unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- unsigned Op2, bool Op2IsKill) {
- unsigned ResultReg = createResultReg(RC);
- const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
- // Make sure the input operands are sufficiently constrained to be legal
- // for this instruction.
- Op0 = constrainOperandRegClass(II, Op0, 1);
- Op1 = constrainOperandRegClass(II, Op1, 2);
- Op2 = constrainOperandRegClass(II, Op1, 3);
-
- if (II.getNumDefs() >= 1) {
- AddOptionalDefs(
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addReg(Op2, Op2IsKill * RegState::Kill));
- } else {
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addReg(Op2, Op2IsKill * RegState::Kill));
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(II.ImplicitDefs[0]));
- }
- return ResultReg;
-}
-
unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
@@ -576,12 +538,15 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
return ResultReg;
}
+bool ARMFastISel::isPositionIndependent() const {
+ return TLI.isPositionIndependent();
+}
+
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
- Reloc::Model RelocM = TM.getRelocationModel();
- bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+ bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
: &ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
@@ -591,23 +556,20 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
bool IsThreadLocal = GVar && GVar->isThreadLocal();
if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
+ bool IsPositionIndependent = isPositionIndependent();
// Use movw+movt when possible, it avoids constant pool entries.
// Non-darwin targets only support static movt relocations in FastISel.
if (Subtarget->useMovt(*FuncInfo.MF) &&
- (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) {
+ (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
unsigned Opc;
unsigned char TF = 0;
if (Subtarget->isTargetMachO())
TF = ARMII::MO_NONLAZY;
- switch (RelocM) {
- case Reloc::PIC_:
+ if (IsPositionIndependent)
Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
- break;
- default:
+ else
Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
- break;
- }
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
} else {
@@ -618,12 +580,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
Align = DL.getTypeAllocSize(GV->getType());
}
- if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
+ if (Subtarget->isTargetELF() && IsPositionIndependent)
return ARMLowerPICELF(GV, Align, VT);
// Grab index.
- unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
- (Subtarget->isThumb() ? 4 : 8);
+ unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
unsigned Id = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
ARMCP::CPValue,
@@ -633,10 +594,10 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// Load value.
MachineInstrBuilder MIB;
if (isThumb2) {
- unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
DestReg).addConstantPoolIndex(Idx);
- if (RelocM == Reloc::PIC_)
+ if (IsPositionIndependent)
MIB.addImm(Id);
AddOptionalDefs(MIB);
} else {
@@ -648,7 +609,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
.addImm(0);
AddOptionalDefs(MIB);
- if (RelocM == Reloc::PIC_) {
+ if (IsPositionIndependent) {
unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -912,7 +873,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags, bool useAM3) {
+ MachineMemOperand::Flags Flags,
+ bool useAM3) {
// addrmode5 output depends on the selection dag addressing dividing the
// offset by 4 that it then later multiplies. Do this here as well.
if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
@@ -931,7 +893,7 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
// ARM halfword load/stores and signed byte loads need an additional
// operand.
if (useAM3) {
- signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
MIB.addReg(0);
MIB.addImm(Imm);
} else {
@@ -945,7 +907,7 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
// ARM halfword load/stores and signed byte loads need an additional
// operand.
if (useAM3) {
- signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+ int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
MIB.addReg(0);
MIB.addImm(Imm);
} else {
@@ -1062,6 +1024,21 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
if (cast<LoadInst>(I)->isAtomic())
return false;
+ const Value *SV = I->getOperand(0);
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(SV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return false;
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
+ if (Alloca->isSwiftError())
+ return false;
+ }
+ }
+
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getType(), VT))
@@ -1177,6 +1154,21 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
if (cast<StoreInst>(I)->isAtomic())
return false;
+ const Value *PtrV = I->getOperand(1);
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return false;
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
+ if (Alloca->isSwiftError())
+ return false;
+ }
+ }
+
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
@@ -1726,6 +1718,13 @@ bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
if (!isTypeLegal(Ty, VT))
return false;
+ // Many ABIs do not provide a libcall for standalone remainder, so we need to
+ // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
+ // multi-reg returns, we'll have to bail out.
+ if (!TLI.hasStandaloneRem(VT)) {
+ return false;
+ }
+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i8)
LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
@@ -1847,6 +1846,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
}
// Fallthrough
case CallingConv::C:
+ case CallingConv::CXX_FAST_TLS:
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
@@ -1858,6 +1858,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
}
case CallingConv::ARM_AAPCS_VFP:
+ case CallingConv::Swift:
if (!isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
// Fall through to soft float variant, variadic functions don't
@@ -2083,6 +2084,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSwiftError() &&
+ F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ return false;
+
if (TLI.supportSplitCSR(FuncInfo.MF))
return false;
@@ -2295,8 +2300,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// TODO: Avoid some calling conventions?
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ FunctionType *FTy = CS.getFunctionType();
bool isVarArg = FTy->isVarArg();
// Handle *simple* calls for now.
@@ -2345,6 +2349,8 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// FIXME: Only handle *easy* calls for now.
if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::SwiftSelf) ||
+ CS.paramHasAttr(AttrInd, Attribute::SwiftError) ||
CS.paramHasAttr(AttrInd, Attribute::Nest) ||
CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
@@ -2394,22 +2400,15 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(CallOpc));
- unsigned char OpFlags = 0;
-
- // Add MO_PLT for global address or external symbol in the PIC relocation
- // model.
- if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_)
- OpFlags = ARMII::MO_PLT;
-
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
AddDefaultPred(MIB);
if (UseReg)
MIB.addReg(CalleeReg);
else if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, OpFlags);
+ MIB.addGlobalAddress(GV, 0, 0);
else
- MIB.addExternalSymbol(IntrMemName, OpFlags);
+ MIB.addExternalSymbol(IntrMemName, 0);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
@@ -2942,8 +2941,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned Align, MVT VT) {
- bool UseGOT_PREL =
- !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+ bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
LLVMContext *Context = &MF->getFunction()->getContext();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
@@ -3006,6 +3004,7 @@ bool ARMFastISel::fastLowerArguments() {
case CallingConv::ARM_AAPCS_VFP:
case CallingConv::ARM_AAPCS:
case CallingConv::ARM_APCS:
+ case CallingConv::Swift:
break;
}
@@ -3019,6 +3018,8 @@ bool ARMFastISel::fastLowerArguments() {
if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
return false;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index c5990bb7d1fb..e8c9f610ea64 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -98,35 +98,32 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
-static bool isCSRestore(MachineInstr *MI,
- const ARMBaseInstrInfo &TII,
+static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII,
const MCPhysReg *CSRegs) {
// Integer spill area is handled with "pop".
- if (isPopOpcode(MI->getOpcode())) {
+ if (isPopOpcode(MI.getOpcode())) {
// The first two operands are predicates. The last two are
// imp-def and imp-use of SP. Check everything in between.
- for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
- if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
+ if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
return false;
return true;
}
- if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
- MI->getOpcode() == ARM::LDR_POST_REG ||
- MI->getOpcode() == ARM::t2LDR_POST) &&
- isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
- MI->getOperand(1).getReg() == ARM::SP)
+ if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
+ MI.getOpcode() == ARM::LDR_POST_REG ||
+ MI.getOpcode() == ARM::t2LDR_POST) &&
+ isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
+ MI.getOperand(1).getReg() == ARM::SP)
return true;
return false;
}
-static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, DebugLoc dl,
- const ARMBaseInstrInfo &TII, unsigned DestReg,
- unsigned SrcReg, int NumBytes,
- unsigned MIFlags = MachineInstr::NoFlags,
- ARMCC::CondCodes Pred = ARMCC::AL,
- unsigned PredReg = 0) {
+static void emitRegPlusImmediate(
+ bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
+ unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
Pred, PredReg, TII, MIFlags);
@@ -136,7 +133,7 @@ static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
}
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
const ARMBaseInstrInfo &TII, int NumBytes,
unsigned MIFlags = MachineInstr::NoFlags,
ARMCC::CondCodes Pred = ARMCC::AL,
@@ -145,9 +142,9 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
MIFlags, Pred, PredReg);
}
-static int sizeOfSPAdjustment(const MachineInstr *MI) {
+static int sizeOfSPAdjustment(const MachineInstr &MI) {
int RegSize;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
case ARM::VSTMDDB_UPD:
RegSize = 8;
break;
@@ -165,7 +162,7 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) {
int count = 0;
// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
// pred) so the list starts at 4.
- for (int i = MI->getNumOperands() - 1; i >= 4; --i)
+ for (int i = MI.getNumOperands() - 1; i >= 4; --i)
count += RegSize;
return count;
}
@@ -206,7 +203,8 @@ struct StackAdjustingInsts {
}
void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
- DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) {
+ const DebugLoc &dl, const ARMBaseInstrInfo &TII,
+ bool HasFP) {
unsigned CFAOffset = 0;
for (auto &Info : Insts) {
if (HasFP && !Info.BeforeFPSet)
@@ -235,7 +233,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
const TargetInstrInfo &TII,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc DL, const unsigned Reg,
+ const DebugLoc &DL, const unsigned Reg,
const unsigned Alignment,
const bool MustBeSingleInstruction) {
const ARMSubtarget &AST =
@@ -355,7 +353,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetDarwin()) {
+ if (STI.splitFramePushPop()) {
GPRCS2Size += 4;
break;
}
@@ -416,7 +414,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// .cfi_offset operations will reflect that.
if (DPRGapSize) {
assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
- if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
@@ -430,7 +428,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
- DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI));
+ DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
LastPush = MBBI++;
}
}
@@ -485,7 +483,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
ARM::SP)
- .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP, RegState::Kill)
.addReg(ARM::R4, RegState::Kill)
.setMIFlags(MachineInstr::FrameSetup)));
NumBytes = 0;
@@ -494,7 +492,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes) {
// Adjust SP after all the callee-save spills.
if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
- tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+ tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -522,7 +520,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// that push.
if (HasFP) {
MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
- unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push);
+ unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
dl, TII, FramePtr, ARM::SP,
PushSize + FramePtrOffsetInPush,
@@ -559,7 +557,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetDarwin())
+ if (STI.splitFramePushPop())
break;
// fallthrough
case ARM::R0:
@@ -592,7 +590,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetDarwin()) {
+ if (STI.splitFramePushPop()) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned Offset = MFI->getObjectOffset(FI);
unsigned CFIIndex = MMI.addFrameInst(
@@ -727,8 +725,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.begin()) {
do {
--MBBI;
- } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
- if (!isCSRestore(MBBI, TII, CSRegs))
+ } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
+ if (!isCSRestore(*MBBI, TII, CSRegs))
++MBBI;
}
@@ -774,8 +772,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(FramePtr));
}
} else if (NumBytes &&
- !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
if (AFI->getDPRCalleeSavedAreaSize()) {
@@ -904,33 +902,27 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+ if (!(Func)(Reg, STI.splitFramePushPop())) continue;
// D-registers in the aligned area DPRCS2 are NOT spilled here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
continue;
- // Add the callee-saved register as live-in unless it's LR and
- // @llvm.returnaddress is called. If LR is returned for
- // @llvm.returnaddress then it's already added to the function and
- // entry block live-in sets.
- bool isKill = true;
- if (Reg == ARM::LR) {
- if (MF.getFrameInfo()->isReturnAddressTaken() &&
- MF.getRegInfo().isLiveIn(Reg))
- isKill = false;
- }
-
- if (isKill)
+ bool isLiveIn = MF.getRegInfo().isLiveIn(Reg);
+ if (!isLiveIn)
MBB.addLiveIn(Reg);
-
// If NoGap is true, push consecutive registers and then leave the rest
// for other instructions. e.g.
// vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
if (NoGap && LastReg && LastReg != Reg-1)
break;
LastReg = Reg;
- Regs.push_back(std::make_pair(Reg, isKill));
+ // Do not set a kill flag on values that are also marked as live-in. This
+ // happens with the @llvm-returnaddress intrinsic and with arguments
+ // passed in callee saved registers.
+ // Omitting the kill flags is conservatively correct even if the live-in
+ // is not used after all.
+ Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
}
if (Regs.empty())
@@ -991,7 +983,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+ if (!(Func)(Reg, STI.splitFramePushPop())) continue;
// The aligned reloads from area DPRCS2 are not inserted here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1027,7 +1019,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet && MI != MBB.end()) {
- MIB.copyImplicitOps(&*MI);
+ MIB.copyImplicitOps(*MI);
MI->eraseFromParent();
}
MI = MIB;
@@ -1367,7 +1359,7 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
unsigned FnSize = 0;
for (auto &MBB : MF) {
for (auto &MI : MBB)
- FnSize += TII.GetInstSizeInBytes(&MI);
+ FnSize += TII.GetInstSizeInBytes(MI);
}
return FnSize;
}
@@ -1485,6 +1477,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
bool CS1Spilled = false;
bool LRSpilled = false;
unsigned NumGPRSpills = 0;
+ unsigned NumFPRSpills = 0;
SmallVector<unsigned, 4> UnspilledCS1GPRs;
SmallVector<unsigned, 4> UnspilledCS2GPRs;
const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
@@ -1539,13 +1532,22 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
CanEliminateFrame = false;
}
- if (!ARM::GPRRegClass.contains(Reg))
+ if (!ARM::GPRRegClass.contains(Reg)) {
+ if (Spilled) {
+ if (ARM::SPRRegClass.contains(Reg))
+ NumFPRSpills++;
+ else if (ARM::DPRRegClass.contains(Reg))
+ NumFPRSpills += 2;
+ else if (ARM::QPRRegClass.contains(Reg))
+ NumFPRSpills += 4;
+ }
continue;
+ }
if (Spilled) {
NumGPRSpills++;
- if (!STI.isTargetDarwin()) {
+ if (!STI.splitFramePushPop()) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
@@ -1567,7 +1569,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
break;
}
} else {
- if (!STI.isTargetDarwin()) {
+ if (!STI.splitFramePushPop()) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}
@@ -1613,12 +1615,21 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// FIXME: We could add logic to be more precise about negative offsets
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
- bool BigStack = (RS && (MFI->estimateStackSize(MF) +
- ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=
- estimateRSStackSizeLimit(MF, this))) ||
+ unsigned EstimatedStackSize =
+ MFI->estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
+ if (hasFP(MF)) {
+ if (AFI->hasStackFrame())
+ EstimatedStackSize += 4;
+ } else {
+ // If FP is not used, SP will be used to access arguments, so count the
+ // size of arguments into the estimation.
+ EstimatedStackSize += MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize();
+ }
+ EstimatedStackSize += 16; // For possible paddings.
+
+ bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) ||
MFI->hasVarSizedObjects() ||
(MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
-
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
@@ -1712,6 +1723,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
} else if (!AFI->isThumb1OnlyFunction()) {
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
+ assert(RS && "Register scavenging not provided");
const TargetRegisterClass *RC = &ARM::GPRRegClass;
RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
@@ -1726,19 +1738,18 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
-
-void ARMFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
const ARMBaseInstrInfo &TII =
*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
// ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc dl = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
+ MachineInstr &Old = *I;
+ DebugLoc dl = Old.getDebugLoc();
+ unsigned Amount = Old.getOperand(0).getImm();
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
@@ -1751,25 +1762,26 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
bool isARM = !AFI->isThumbFunction();
// Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
- int PIdx = Old->findFirstPredOperandIdx();
- ARMCC::CondCodes Pred = (PIdx == -1)
- ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+ unsigned Opc = Old.getOpcode();
+ int PIdx = Old.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred =
+ (PIdx == -1) ? ARMCC::AL
+ : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
// Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
- unsigned PredReg = Old->getOperand(2).getReg();
+ unsigned PredReg = Old.getOperand(2).getReg();
emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
Pred, PredReg);
} else {
// Note: PredReg is operand 3 for ADJCALLSTACKUP.
- unsigned PredReg = Old->getOperand(3).getReg();
+ unsigned PredReg = Old.getOperand(3).getReg();
assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
Pred, PredReg);
}
}
}
- MBB.erase(I);
+ return MBB.erase(I);
}
/// Get the minimum constant for ARM that is greater than or equal to the
@@ -2162,7 +2174,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
PrevStackMBB->addSuccessor(McrMBB);
-#ifdef XDEBUG
+#ifdef EXPENSIVE_CHECKS
MF.verify();
#endif
}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 66f4dfb6ef52..21cd78da395c 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -74,7 +74,7 @@ public:
bool(*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs) const;
- void
+ MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0157c0a35286..0d904ecb6296 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -50,8 +50,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
// Skip over one non-VFP / NEON instruction.
if (!LastMI->isBarrier() &&
- // On A9, AGU and NEON/FPU are muxed.
- !(TII.getSubtarget().isLikeA9() && LastMI->mayLoadOrStore()) &&
+ !(TII.getSubtarget().hasMuxedUnits() && LastMI->mayLoadOrStore()) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 6e7edbf9fb15..20db3d39bcae 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -29,7 +29,6 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLowering.h"
@@ -44,11 +43,6 @@ DisableShifterOp("disable-shifter-op", cl::Hidden,
cl::desc("Disable isel of shifter-op"),
cl::init(false));
-static cl::opt<bool>
-CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
- cl::desc("Check fp vmla / vmls hazard at isel time"),
- cl::init(true));
-
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -84,12 +78,11 @@ public:
/// getI32Imm - Return a target constant of type i32 with the specified
/// value.
- inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+ inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
}
- SDNode *Select(SDNode *N) override;
-
+ void Select(SDNode *N) override;
bool hasNoVMLxHazardUse(SDNode *N) const;
bool isShifterOpProfitable(const SDValue &Shift,
@@ -200,57 +193,61 @@ public:
#include "ARMGenDAGISel.inc"
private:
- /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
- /// ARM.
- SDNode *SelectARMIndexedLoad(SDNode *N);
- SDNode *SelectT2IndexedLoad(SDNode *N);
+ /// Indexed (pre/post inc/dec) load matching code for ARM.
+ bool tryARMIndexedLoad(SDNode *N);
+ bool tryT1IndexedLoad(SDNode *N);
+ bool tryT2IndexedLoad(SDNode *N);
/// SelectVLD - Select NEON load intrinsics. NumVecs should be
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
- SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *DOpcodes,
- const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
+ void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
- SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *DOpcodes,
- const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
+ void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1);
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
/// load/store of D registers and Q registers.
- SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
- bool isUpdating, unsigned NumVecs,
- const uint16_t *DOpcodes, const uint16_t *QOpcodes);
+ void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
+ unsigned NumVecs, const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes);
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 2, 3 or 4. The opcode array specifies the instructions used
/// for loading D registers. (Q registers are not supported.)
- SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *Opcodes);
+ void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
/// generated to force the table registers to be consecutive.
- SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+ void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
- /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
- SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+ /// Try to select SBFX/UBFX instructions for ARM.
+ bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
// Select special operations if node forms integer ABS pattern
- SDNode *SelectABSOp(SDNode *N);
+ bool tryABSOp(SDNode *N);
+
+ bool tryReadRegister(SDNode *N);
+ bool tryWriteRegister(SDNode *N);
- SDNode *SelectReadRegister(SDNode *N);
- SDNode *SelectWriteRegister(SDNode *N);
+ bool tryInlineAsm(SDNode *N);
- SDNode *SelectInlineAsm(SDNode *N);
+ void SelectConcatVector(SDNode *N);
- SDNode *SelectConcatVector(SDNode *N);
+ bool trySMLAWSMULW(SDNode *N);
+
+ void SelectCMP_SWAP(SDNode *N);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -269,7 +266,7 @@ private:
SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
// Get the alignment operand for a NEON VLD or VST instruction.
- SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
+ SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
bool is64BitVector);
/// Returns the number of instructions required to materialize the given
@@ -426,11 +423,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (OptLevel == CodeGenOpt::None)
return true;
- if (!CheckVMLxHazard)
- return true;
-
- if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
- !Subtarget->isCortexA9() && !Subtarget->isSwift())
+ if (!Subtarget->hasVMLxHazards())
return true;
if (!N->hasOneUse())
@@ -484,6 +477,7 @@ unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
if (Subtarget->isThumb()) {
if (Val <= 255) return 1; // MOV
if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (Val <= 510) return 2; // MOV + ADDi8
if (~Val <= 255) return 2; // MOV + MVN
if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
} else {
@@ -548,11 +542,9 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
unsigned PowerOfTwo = 0;
SDValue NewMulConst;
if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
- BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
- N.getOperand(0), NewMulConst)
- .getNode()),
- 0);
+ HandleSDNode Handle(N);
replaceDAGValue(N.getOperand(1), NewMulConst);
+ BaseReg = Handle.getValue();
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
PowerOfTwo),
SDLoc(N), MVT::i32);
@@ -623,6 +615,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else
@@ -803,6 +796,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
@@ -1070,6 +1064,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
@@ -1190,6 +1185,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
return false; // We want to select register offset instead
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else {
@@ -1297,6 +1293,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
@@ -1468,15 +1465,15 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
//===--------------------------------------------------------------------===//
/// getAL - Returns a ARMCC::AL immediate node.
-static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
+static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
}
-SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
- return nullptr;
+ return false;
EVT LoadedVT = LD->getMemoryVT();
SDValue Offset, AMOpc;
@@ -1530,26 +1527,53 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
- MVT::i32, MVT::Other, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
+ MVT::i32, MVT::Other, Ops));
+ return true;
} else {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
- MVT::i32, MVT::Other, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
+ MVT::i32, MVT::Other, Ops));
+ return true;
}
}
- return nullptr;
+ return false;
+}
+
+bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD ||
+ AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
+ return false;
+
+ auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
+ if (!COffs || COffs->getZExtValue() != 4)
+ return false;
+
+ // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
+ // The encoding of LDM is not how the rest of ISel expects a post-inc load to
+ // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
+ // ISel.
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32,
+ MVT::Other, Ops));
+ return true;
}
-SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
- return nullptr;
+ return false;
EVT LoadedVT = LD->getMemoryVT();
bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -1576,7 +1600,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
break;
default:
- return nullptr;
+ return false;
}
Match = true;
}
@@ -1586,11 +1610,12 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
CurDAG->getRegister(0, MVT::i32), Chain };
- return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
- MVT::Other, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
+ MVT::Other, Ops));
+ return true;
}
- return nullptr;
+ return false;
}
/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
@@ -1685,7 +1710,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
/// of a NEON VLD or VST instruction. The supported values depend on the
/// number of registers being loaded.
-SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
unsigned NumVecs, bool is64BitVector) {
unsigned NumRegs = NumVecs;
if (!is64BitVector && NumVecs < 3)
@@ -1806,17 +1831,17 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
return Opc; // If not one we handle, return it unchanged.
}
-SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *DOpcodes,
- const uint16_t *QOpcodes0,
- const uint16_t *QOpcodes1) {
+void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return nullptr;
+ return;
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -1922,13 +1947,16 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
- if (NumVecs == 1)
- return VLd;
+ if (NumVecs == 1) {
+ ReplaceNode(N, VLd);
+ return;
+ }
// Extract out the subregisters.
SDValue SuperReg = SDValue(VLd, 0);
- assert(ARM::dsub_7 == ARM::dsub_0+7 &&
- ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
+ ARM::qsub_3 == ARM::qsub_0 + 3,
+ "Unexpected subreg numbering");
unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
@@ -1936,13 +1964,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
- return nullptr;
+ CurDAG->RemoveDeadNode(N);
}
-SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *DOpcodes,
- const uint16_t *QOpcodes0,
- const uint16_t *QOpcodes1) {
+void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);
@@ -1950,7 +1978,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return nullptr;
+ return;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2042,7 +2070,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
// Transfer memoperands.
cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
- return VSt;
+ ReplaceNode(N, VSt);
+ return;
}
// Otherwise, quad registers are stored with two separate instructions,
@@ -2083,13 +2112,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
Ops);
cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
- return VStB;
+ ReplaceNode(N, VStB);
}
-SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
- bool isUpdating, unsigned NumVecs,
- const uint16_t *DOpcodes,
- const uint16_t *QOpcodes) {
+void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
SDLoc dl(N);
@@ -2097,7 +2126,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return nullptr;
+ return;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2188,13 +2217,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
QOpcodes[OpcodeIndex]);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
- if (!IsLoad)
- return VLdLn;
+ if (!IsLoad) {
+ ReplaceNode(N, VLdLn);
+ return;
+ }
// Extract the subregisters.
SuperReg = SDValue(VLdLn, 0);
- assert(ARM::dsub_7 == ARM::dsub_0+7 &&
- ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
+ ARM::qsub_3 == ARM::qsub_0 + 3,
+ "Unexpected subreg numbering");
unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
@@ -2202,18 +2234,17 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
- return nullptr;
+ CurDAG->RemoveDeadNode(N);
}
-SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
- unsigned NumVecs,
- const uint16_t *Opcodes) {
+void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
- return nullptr;
+ return;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2277,7 +2308,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
SuperReg = SDValue(VLdDup, 0);
// Extract the subregisters.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
unsigned SubIdx = ARM::dsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
@@ -2285,11 +2316,11 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
- return nullptr;
+ CurDAG->RemoveDeadNode(N);
}
-SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
- unsigned Opc) {
+void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -2318,13 +2349,12 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
Ops.push_back(getAL(CurDAG, dl)); // predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
- return CurDAG->getMachineNode(Opc, dl, VT, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
}
-SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
- bool isSigned) {
+bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (!Subtarget->hasV6T2Ops())
- return nullptr;
+ return false;
unsigned Opc = isSigned
? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
@@ -2338,7 +2368,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
// The immediate is a mask of the low bits iff imm & (imm+1) == 0
if (And_imm & (And_imm + 1))
- return nullptr;
+ return false;
unsigned Srl_imm = 0;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
@@ -2358,7 +2388,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
getAL(CurDAG, dl), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ return true;
}
// ARM models shift instructions as MOVsi with shifter operand.
@@ -2368,17 +2399,19 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
getAL(CurDAG, dl), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
+ return true;
}
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
CurDAG->getTargetConstant(Width, dl, MVT::i32),
getAL(CurDAG, dl), Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ return true;
}
}
- return nullptr;
+ return false;
}
// Otherwise, we're looking for a shift of a shift
@@ -2392,13 +2425,35 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
unsigned Width = 32 - Srl_imm - 1;
int LSB = Srl_imm - Shl_imm;
if (LSB < 0)
- return nullptr;
+ return false;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
CurDAG->getTargetConstant(Width, dl, MVT::i32),
getAL(CurDAG, dl), Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ return true;
+ }
+ }
+
+ // Or we are looking for a shift of an and, with a mask operand
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
+ isShiftedMask_32(And_imm)) {
+ unsigned Srl_imm = 0;
+ unsigned LSB = countTrailingZeros(And_imm);
+ // Shift must be the same as the ands lsb
+ if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
+ assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ unsigned MSB = 31 - countLeadingZeros(And_imm);
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = MSB - LSB;
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
+ CurDAG->getTargetConstant(Width, dl, MVT::i32),
+ getAL(CurDAG, dl), Reg0 };
+ CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ return true;
}
}
@@ -2407,20 +2462,21 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
unsigned LSB = 0;
if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
- return nullptr;
+ return false;
if (LSB + Width > 32)
- return nullptr;
+ return false;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
getAL(CurDAG, dl), Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ return true;
}
- return nullptr;
+ return false;
}
/// Target-specific DAG combining for ISD::XOR.
@@ -2433,16 +2489,16 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
/// ARM instruction selection detects the latter and matches it to
/// ARM::ABS or ARM::t2ABS machine node.
-SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
+bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
SDValue XORSrc0 = N->getOperand(0);
SDValue XORSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (Subtarget->isThumb1Only())
- return nullptr;
+ return false;
if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
- return nullptr;
+ return false;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
SDValue ADDSrc1 = XORSrc0.getOperand(1);
@@ -2456,57 +2512,214 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
XType.isInteger() && SRAConstant != nullptr &&
Size == SRAConstant->getZExtValue()) {
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
- return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ return true;
+ }
+
+ return false;
+}
+
+static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
+ bool Accumulate) {
+ // For SM*WB, we need to some form of sext.
+ // For SM*WT, we need to search for (sra X, 16)
+ // Src1 then gets set to X.
+ if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
+ SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ SignExt.getOpcode() == ISD::AssertSext) &&
+ SignExt.getValueType() == MVT::i32) {
+
+ *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
+ Src1 = SignExt.getOperand(0);
+ return true;
}
- return nullptr;
+ if (SignExt.getOpcode() != ISD::SRA)
+ return false;
+
+ ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
+ if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
+ return false;
+
+ SDValue Op0 = SignExt.getOperand(0);
+
+ // The sign extend operand for SM*WB could be generated by a shl and ashr.
+ if (Op0.getOpcode() == ISD::SHL) {
+ SDValue SHL = Op0;
+ ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+ if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
+ return false;
+
+ *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
+ Src1 = Op0.getOperand(0);
+ return true;
+ }
+ *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
+ Src1 = SignExt.getOperand(0);
+ return true;
}
-SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
+ SDValue &Src1, bool Accumulate) {
+ // First we look for:
+ // (add (or (srl ?, 16), (shl ?, 16)))
+ if (OR.getOpcode() != ISD::OR)
+ return false;
+
+ SDValue SRL = OR.getOperand(0);
+ SDValue SHL = OR.getOperand(1);
+
+ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
+ SRL = OR.getOperand(1);
+ SHL = OR.getOperand(0);
+ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
+ return false;
+ }
+
+ ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
+ ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+ if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
+ SHLSrc1->getZExtValue() != 16)
+ return false;
+
+ // The first operands to the shifts need to be the two results from the
+ // same smul_lohi node.
+ if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
+ SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
+ return false;
+
+ SDNode *SMULLOHI = SRL.getOperand(0).getNode();
+ if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
+ SHL.getOperand(0) != SDValue(SMULLOHI, 1))
+ return false;
+
+ // Now we have:
+ // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
+ // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
+ // For SMLAWB the 16-bit value will signed extended somehow.
+ // For SMLAWT only the SRA is required.
+
+ // Check both sides of SMUL_LOHI
+ if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
+ Src0 = SMULLOHI->getOperand(1);
+ } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
+ Accumulate)) {
+ Src0 = SMULLOHI->getOperand(0);
+ } else {
+ return false;
+ }
+ return true;
+}
+
+bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ SDValue A, B;
+ unsigned Opc = 0;
+
+ if (N->getOpcode() == ISD::ADD) {
+ if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
+ return false;
+
+ SDValue Acc;
+ if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
+ Acc = Src1;
+ } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
+ Acc = Src0;
+ } else {
+ return false;
+ }
+ if (Opc == 0)
+ return false;
+
+ SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32) };
+ CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
+ return true;
+ } else if (N->getOpcode() == ISD::OR &&
+ SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
+ if (Opc == 0)
+ return false;
+
+ SDValue Ops[] = { A, B, getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32)};
+ CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ return true;
+ }
+ return false;
+}
+
+/// We've got special pseudo-instructions for these
+void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
+ unsigned Opcode;
+ EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
+ if (MemTy == MVT::i8)
+ Opcode = ARM::CMP_SWAP_8;
+ else if (MemTy == MVT::i16)
+ Opcode = ARM::CMP_SWAP_16;
+ else if (MemTy == MVT::i32)
+ Opcode = ARM::CMP_SWAP_32;
+ else
+ llvm_unreachable("Unknown AtomicCmpSwap type");
+
+ SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ N->getOperand(0)};
+ SDNode *CmpSwap = CurDAG->getMachineNode(
+ Opcode, SDLoc(N),
+ CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
+ ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
+ CurDAG->RemoveDeadNode(N);
+}
+
+void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() || N->getNumOperands() != 2)
llvm_unreachable("unexpected CONCAT_VECTORS");
- return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
+ ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
}
-SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
+void ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return nullptr; // Already selected.
+ return; // Already selected.
}
switch (N->getOpcode()) {
default: break;
- case ISD::WRITE_REGISTER: {
- SDNode *ResNode = SelectWriteRegister(N);
- if (ResNode)
- return ResNode;
+ case ISD::ADD:
+ case ISD::OR:
+ if (trySMLAWSMULW(N))
+ return;
break;
- }
- case ISD::READ_REGISTER: {
- SDNode *ResNode = SelectReadRegister(N);
- if (ResNode)
- return ResNode;
+ case ISD::WRITE_REGISTER:
+ if (tryWriteRegister(N))
+ return;
break;
- }
- case ISD::INLINEASM: {
- SDNode *ResNode = SelectInlineAsm(N);
- if (ResNode)
- return ResNode;
+ case ISD::READ_REGISTER:
+ if (tryReadRegister(N))
+ return;
break;
- }
- case ISD::XOR: {
+ case ISD::INLINEASM:
+ if (tryInlineAsm(N))
+ return;
+ break;
+ case ISD::XOR:
// Select special operations if XOR node forms integer ABS pattern
- SDNode *ResNode = SelectABSOp(N);
- if (ResNode)
- return ResNode;
+ if (tryABSOp(N))
+ return;
// Other cases are autogenerated.
break;
- }
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
// If we can't materialize the constant we need to use a literal pool
@@ -2530,11 +2743,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32),
CurDAG->getEntryNode()
};
- ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
- Ops);
+ ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+ Ops);
}
- ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
- return nullptr;
+ ReplaceNode(N, ResNode);
+ return;
}
// Other cases are autogenerated.
@@ -2551,25 +2764,27 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MachineFrameInfo *MFI = MF->getFrameInfo();
if (MFI->getObjectAlignment(FI) < 4)
MFI->setObjectAlignment(FI, 4);
- return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
- CurDAG->getTargetConstant(0, dl, MVT::i32));
+ CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, dl, MVT::i32));
+ return;
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ return;
}
}
case ISD::SRL:
- if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
- return I;
+ if (tryV6T2BitfieldExtractOp(N, false))
+ return;
break;
case ISD::SIGN_EXTEND_INREG:
case ISD::SRA:
- if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
- return I;
+ if (tryV6T2BitfieldExtractOp(N, true))
+ return;
break;
case ISD::MUL:
if (Subtarget->isThumb1Only())
@@ -2587,11 +2802,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
+ return;
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
+ return;
}
}
if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
@@ -2604,19 +2821,63 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
+ return;
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
+ CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
+ return;
}
}
}
break;
case ISD::AND: {
// Check for unsigned bitfield extract
- if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
- return I;
+ if (tryV6T2BitfieldExtractOp(N, false))
+ return;
+
+ // If an immediate is used in an AND node, it is possible that the immediate
+ // can be more optimally materialized when negated. If this is the case we
+ // can negate the immediate and use a BIC instead.
+ auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
+ uint32_t Imm = (uint32_t) N1C->getZExtValue();
+
+ // In Thumb2 mode, an AND can take a 12-bit immediate. If this
+ // immediate can be negated and fit in the immediate operand of
+ // a t2BIC, don't do any manual transform here as this can be
+ // handled by the generic ISel machinery.
+ bool PreferImmediateEncoding =
+ Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
+ if (!PreferImmediateEncoding &&
+ ConstantMaterializationCost(Imm) >
+ ConstantMaterializationCost(~Imm)) {
+ // The current immediate costs more to materialize than a negated
+ // immediate, so negate the immediate and use a BIC.
+ SDValue NewImm =
+ CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
+ // If the new constant didn't exist before, reposition it in the topological
+ // ordering so it is just before N. Otherwise, don't touch its location.
+ if (NewImm->getNodeId() == -1)
+ CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
+
+ if (!Subtarget->hasThumb2()) {
+ SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
+ N->getOperand(0), NewImm, getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32)};
+ ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
+ return;
+ } else {
+ SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32)};
+ ReplaceNode(N,
+ CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
+ return;
+ }
+ }
+ }
// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
@@ -2632,7 +2893,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (!Opc)
break;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ N1C = dyn_cast<ConstantSDNode>(N1);
if (!N1C)
break;
if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
@@ -2649,29 +2910,34 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
dl, MVT::i32);
SDValue Ops[] = { N0.getOperand(0), Imm16,
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(Opc, dl, VT, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
+ return;
}
}
break;
}
case ARMISD::VMOVRRD:
- return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
- N->getOperand(0), getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32));
+ ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
+ N->getOperand(0), getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32)));
+ return;
case ISD::UMUL_LOHI: {
if (Subtarget->isThumb1Only())
break;
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
+ ReplaceNode(
+ N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
+ return;
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
- ARM::UMULL : ARM::UMULLv5,
- dl, MVT::i32, MVT::i32, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(
+ Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
+ MVT::i32, MVT::i32, Ops));
+ return;
}
}
case ISD::SMUL_LOHI: {
@@ -2680,30 +2946,76 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
+ ReplaceNode(
+ N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
+ return;
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
- ARM::SMULL : ARM::SMULLv5,
- dl, MVT::i32, MVT::i32, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(
+ Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
+ MVT::i32, MVT::i32, Ops));
+ return;
}
}
+ case ARMISD::UMAAL: {
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3),
+ getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32) };
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
+ return;
+ }
case ARMISD::UMLAL:{
+ // UMAAL is similar to UMLAL but it adds two 32-bit values to the
+ // 64-bit multiplication result.
+ if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
+ N->getOperand(3).getOpcode() == ARMISD::ADDE) {
+
+ SDValue Addc = N->getOperand(2);
+ SDValue Adde = N->getOperand(3);
+
+ if (Adde.getOperand(2).getNode() == Addc.getNode()) {
+
+ ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
+ ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
+
+ if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
+ {
+ // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
+ // RdLo = one operand to be added, lower 32-bits of res
+ // RdHi = other operand to be added, upper 32-bits of res
+ // Rn = first multiply operand
+ // Rm = second multiply operand
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ Addc.getOperand(0), Addc.getOperand(1),
+ getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32) };
+ unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
+ CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
+ return;
+ }
+ }
+ }
+
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32)};
- return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
+ ReplaceNode(
+ N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
+ return;
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
- ARM::UMLAL : ARM::UMLALv5,
- dl, MVT::i32, MVT::i32, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(
+ Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
+ MVT::i32, MVT::i32, Ops));
+ return;
}
}
case ARMISD::SMLAL:{
@@ -2711,25 +3023,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32)};
- return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
+ ReplaceNode(
+ N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
+ return;
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
- ARM::SMLAL : ARM::SMLALv5,
- dl, MVT::i32, MVT::i32, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(
+ Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
+ MVT::i32, MVT::i32, Ops));
+ return;
}
}
case ISD::LOAD: {
- SDNode *ResNode = nullptr;
- if (Subtarget->isThumb() && Subtarget->hasThumb2())
- ResNode = SelectT2IndexedLoad(N);
- else
- ResNode = SelectARMIndexedLoad(N);
- if (ResNode)
- return ResNode;
+ if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
+ if (tryT2IndexedLoad(N))
+ return;
+ } else if (Subtarget->isThumb()) {
+ if (tryT1IndexedLoad(N))
+ return;
+ } else if (tryARMIndexedLoad(N))
+ return;
// Other cases are autogenerated.
break;
}
@@ -2770,13 +3086,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
ReplaceUses(SDValue(N, 0),
SDValue(Chain.getNode(), Chain.getResNo()));
- return nullptr;
+ CurDAG->RemoveDeadNode(N);
+ return;
}
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return nullptr;
+ default: return;
case MVT::v8i8: Opc = ARM::VZIPd8; break;
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
@@ -2790,13 +3107,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG, dl);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
+ return;
}
case ARMISD::VUZP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return nullptr;
+ default: return;
case MVT::v8i8: Opc = ARM::VUZPd8; break;
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
@@ -2810,13 +3128,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG, dl);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
+ return;
}
case ARMISD::VTRN: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return nullptr;
+ default: return;
case MVT::v8i8: Opc = ARM::VTRNd8; break;
case MVT::v4i16: Opc = ARM::VTRNd16; break;
case MVT::v2f32:
@@ -2829,7 +3148,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG, dl);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
+ return;
}
case ARMISD::BUILD_VECTOR: {
EVT VecVT = N->getValueType(0);
@@ -2837,55 +3157,68 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned NumElts = VecVT.getVectorNumElements();
if (EltVT == MVT::f64) {
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
- return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
+ ReplaceNode(
+ N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
+ return;
}
assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
- if (NumElts == 2)
- return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
+ if (NumElts == 2) {
+ ReplaceNode(
+ N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
+ return;
+ }
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
- return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
- N->getOperand(2), N->getOperand(3));
+ ReplaceNode(N,
+ createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3)));
+ return;
}
case ARMISD::VLD2DUP: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
ARM::VLD2DUPd32 };
- return SelectVLDDup(N, false, 2, Opcodes);
+ SelectVLDDup(N, false, 2, Opcodes);
+ return;
}
case ARMISD::VLD3DUP: {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
ARM::VLD3DUPd16Pseudo,
ARM::VLD3DUPd32Pseudo };
- return SelectVLDDup(N, false, 3, Opcodes);
+ SelectVLDDup(N, false, 3, Opcodes);
+ return;
}
case ARMISD::VLD4DUP: {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
ARM::VLD4DUPd16Pseudo,
ARM::VLD4DUPd32Pseudo };
- return SelectVLDDup(N, false, 4, Opcodes);
+ SelectVLDDup(N, false, 4, Opcodes);
+ return;
}
case ARMISD::VLD2DUP_UPD: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
ARM::VLD2DUPd16wb_fixed,
ARM::VLD2DUPd32wb_fixed };
- return SelectVLDDup(N, true, 2, Opcodes);
+ SelectVLDDup(N, true, 2, Opcodes);
+ return;
}
case ARMISD::VLD3DUP_UPD: {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
ARM::VLD3DUPd16Pseudo_UPD,
ARM::VLD3DUPd32Pseudo_UPD };
- return SelectVLDDup(N, true, 3, Opcodes);
+ SelectVLDDup(N, true, 3, Opcodes);
+ return;
}
case ARMISD::VLD4DUP_UPD: {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
ARM::VLD4DUPd16Pseudo_UPD,
ARM::VLD4DUPd32Pseudo_UPD };
- return SelectVLDDup(N, true, 4, Opcodes);
+ SelectVLDDup(N, true, 4, Opcodes);
+ return;
}
case ARMISD::VLD1_UPD: {
@@ -2897,7 +3230,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1q16wb_fixed,
ARM::VLD1q32wb_fixed,
ARM::VLD1q64wb_fixed };
- return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
+ SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
+ return;
}
case ARMISD::VLD2_UPD: {
@@ -2908,7 +3242,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
ARM::VLD2q16PseudoWB_fixed,
ARM::VLD2q32PseudoWB_fixed };
- return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ return;
}
case ARMISD::VLD3_UPD: {
@@ -2922,7 +3257,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
ARM::VLD3q16oddPseudo_UPD,
ARM::VLD3q32oddPseudo_UPD };
- return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
}
case ARMISD::VLD4_UPD: {
@@ -2936,7 +3272,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
ARM::VLD4q16oddPseudo_UPD,
ARM::VLD4q32oddPseudo_UPD };
- return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
}
case ARMISD::VLD2LN_UPD: {
@@ -2945,7 +3282,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD2LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
ARM::VLD2LNq32Pseudo_UPD };
- return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ return;
}
case ARMISD::VLD3LN_UPD: {
@@ -2954,7 +3292,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD3LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
ARM::VLD3LNq32Pseudo_UPD };
- return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ return;
}
case ARMISD::VLD4LN_UPD: {
@@ -2963,7 +3302,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD4LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
ARM::VLD4LNq32Pseudo_UPD };
- return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ return;
}
case ARMISD::VST1_UPD: {
@@ -2975,7 +3315,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST1q16wb_fixed,
ARM::VST1q32wb_fixed,
ARM::VST1q64wb_fixed };
- return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
+ SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
+ return;
}
case ARMISD::VST2_UPD: {
@@ -2986,7 +3327,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
ARM::VST2q16PseudoWB_fixed,
ARM::VST2q32PseudoWB_fixed };
- return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ return;
}
case ARMISD::VST3_UPD: {
@@ -3000,7 +3342,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
ARM::VST3q16oddPseudo_UPD,
ARM::VST3q32oddPseudo_UPD };
- return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
}
case ARMISD::VST4_UPD: {
@@ -3014,7 +3357,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
ARM::VST4q16oddPseudo_UPD,
ARM::VST4q32oddPseudo_UPD };
- return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
}
case ARMISD::VST2LN_UPD: {
@@ -3023,7 +3367,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST2LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
ARM::VST2LNq32Pseudo_UPD };
- return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ return;
}
case ARMISD::VST3LN_UPD: {
@@ -3032,7 +3377,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST3LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
ARM::VST3LNq32Pseudo_UPD };
- return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ return;
}
case ARMISD::VST4LN_UPD: {
@@ -3041,7 +3387,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST4LNd32Pseudo_UPD };
static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
ARM::VST4LNq32Pseudo_UPD };
- return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+ return;
}
case ISD::INTRINSIC_VOID:
@@ -3051,12 +3398,44 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
default:
break;
+ case Intrinsic::arm_mrrc:
+ case Intrinsic::arm_mrrc2: {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ unsigned Opc;
+
+ if (Subtarget->isThumb())
+ Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
+ else
+ Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
+
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
+ Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
+ Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
+
+ // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
+ // instruction will always be '1111' but it is possible in assembly language to specify
+ // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
+ if (Opc != ARM::MRRC2) {
+ Ops.push_back(getAL(CurDAG, dl));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ }
+
+ Ops.push_back(Chain);
+
+ // Writes to two registers.
+ const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
+
+ ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
+ return;
+ }
case Intrinsic::arm_ldaexd:
case Intrinsic::arm_ldrexd: {
SDLoc dl(N);
SDValue Chain = N->getOperand(0);
SDValue MemAddr = N->getOperand(2);
- bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
@@ -3072,11 +3451,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ResTys.push_back(MVT::Other);
// Place arguments in the right order.
- SmallVector<SDValue, 7> Ops;
- Ops.push_back(MemAddr);
- Ops.push_back(getAL(CurDAG, dl));
- Ops.push_back(CurDAG->getRegister(0, MVT::i32));
- Ops.push_back(Chain);
+ SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32), Chain};
SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
@@ -3112,7 +3488,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ReplaceUses(SDValue(N, 1), Result);
}
ReplaceUses(SDValue(N, 2), OutChain);
- return nullptr;
+ CurDAG->RemoveDeadNode(N);
+ return;
}
case Intrinsic::arm_stlexd:
case Intrinsic::arm_strexd: {
@@ -3150,7 +3527,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
- return St;
+ ReplaceNode(N, St);
+ return;
}
case Intrinsic::arm_neon_vld1: {
@@ -3158,7 +3536,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1d32, ARM::VLD1d64 };
static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
ARM::VLD1q32, ARM::VLD1q64};
- return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
+ SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
+ return;
}
case Intrinsic::arm_neon_vld2: {
@@ -3166,7 +3545,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD2d32, ARM::VLD1q64 };
static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
- return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
+ SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
+ return;
}
case Intrinsic::arm_neon_vld3: {
@@ -3180,7 +3560,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
ARM::VLD3q16oddPseudo,
ARM::VLD3q32oddPseudo };
- return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
}
case Intrinsic::arm_neon_vld4: {
@@ -3194,7 +3575,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
ARM::VLD4q16oddPseudo,
ARM::VLD4q32oddPseudo };
- return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
}
case Intrinsic::arm_neon_vld2lane: {
@@ -3203,7 +3585,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD2LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
ARM::VLD2LNq32Pseudo };
- return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+ return;
}
case Intrinsic::arm_neon_vld3lane: {
@@ -3212,7 +3595,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD3LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
ARM::VLD3LNq32Pseudo };
- return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+ return;
}
case Intrinsic::arm_neon_vld4lane: {
@@ -3221,7 +3605,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD4LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
ARM::VLD4LNq32Pseudo };
- return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+ return;
}
case Intrinsic::arm_neon_vst1: {
@@ -3229,15 +3614,17 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST1d32, ARM::VST1d64 };
static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
ARM::VST1q32, ARM::VST1q64 };
- return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
+ SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
+ return;
}
case Intrinsic::arm_neon_vst2: {
static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
ARM::VST2d32, ARM::VST1q64 };
- static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
- ARM::VST2q32Pseudo };
- return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
+ static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
+ SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
+ return;
}
case Intrinsic::arm_neon_vst3: {
@@ -3251,7 +3638,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
ARM::VST3q16oddPseudo,
ARM::VST3q32oddPseudo };
- return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
}
case Intrinsic::arm_neon_vst4: {
@@ -3265,7 +3653,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
ARM::VST4q16oddPseudo,
ARM::VST4q32oddPseudo };
- return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
}
case Intrinsic::arm_neon_vst2lane: {
@@ -3274,7 +3663,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST2LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
ARM::VST2LNq32Pseudo };
- return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+ return;
}
case Intrinsic::arm_neon_vst3lane: {
@@ -3283,7 +3673,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST3LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
ARM::VST3LNq32Pseudo };
- return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+ return;
}
case Intrinsic::arm_neon_vst4lane: {
@@ -3292,7 +3683,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST4LNd32Pseudo };
static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
ARM::VST4LNq32Pseudo };
- return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+ SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+ return;
}
}
break;
@@ -3305,18 +3697,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
case Intrinsic::arm_neon_vtbl2:
- return SelectVTBL(N, false, 2, ARM::VTBL2);
+ SelectVTBL(N, false, 2, ARM::VTBL2);
+ return;
case Intrinsic::arm_neon_vtbl3:
- return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+ SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+ return;
case Intrinsic::arm_neon_vtbl4:
- return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+ SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+ return;
case Intrinsic::arm_neon_vtbx2:
- return SelectVTBL(N, true, 2, ARM::VTBX2);
+ SelectVTBL(N, true, 2, ARM::VTBX2);
+ return;
case Intrinsic::arm_neon_vtbx3:
- return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+ SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+ return;
case Intrinsic::arm_neon_vtbx4:
- return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+ SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+ return;
}
break;
}
@@ -3324,13 +3722,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VTBL1: {
SDLoc dl(N);
EVT VT = N->getValueType(0);
- SmallVector<SDValue, 6> Ops;
-
- Ops.push_back(N->getOperand(0));
- Ops.push_back(N->getOperand(1));
- Ops.push_back(getAL(CurDAG, dl)); // Predicate
- Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
+ SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
+ getAL(CurDAG, dl), // Predicate
+ CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
+ ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
+ return;
}
case ARMISD::VTBL2: {
SDLoc dl(N);
@@ -3341,19 +3737,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue V1 = N->getOperand(1);
SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(2));
- Ops.push_back(getAL(CurDAG, dl)); // Predicate
- Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
+ SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
+ CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
+ ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
+ return;
}
case ISD::CONCAT_VECTORS:
- return SelectConcatVector(N);
+ SelectConcatVector(N);
+ return;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ SelectCMP_SWAP(N);
+ return;
}
- return SelectCode(N);
+ SelectCode(N);
}
// Inspect a register string of the form
@@ -3362,8 +3761,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
// and obtain the integer operands from them, adding these operands to the
// provided vector.
static void getIntOperandsFromRegisterString(StringRef RegString,
- SelectionDAG *CurDAG, SDLoc DL,
- std::vector<SDValue>& Ops) {
+ SelectionDAG *CurDAG,
+ const SDLoc &DL,
+ std::vector<SDValue> &Ops) {
SmallVector<StringRef, 5> Fields;
RegString.split(Fields, ':');
@@ -3444,6 +3844,9 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
.Case("basepri_max", 0x12)
.Case("faultmask", 0x13)
.Case("control", 0x14)
+ .Case("msplim", 0x0a)
+ .Case("psplim", 0x0b)
+ .Case("sp", 0x18)
.Default(-1);
}
@@ -3473,11 +3876,27 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
return -1;
+ if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
+ Flags = "";
+ SYSmvalue |= 0x80;
+ }
+
+ if (!Subtarget->has8MSecExt() &&
+ (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
+ return -1;
+
+ if (!Subtarget->hasV8MMainlineOps() &&
+ (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
+ SYSmvalue == 0x93))
+ return -1;
+
// If it was a read then we won't be expecting flags and so at this point
// we can return the mask.
if (IsRead) {
- assert (Flags.empty() && "Unexpected flags for reading M class register.");
- return SYSmvalue;
+ if (Flags.empty())
+ return SYSmvalue;
+ else
+ return -1;
}
// We know we are now handling a write so need to get the mask for the flags.
@@ -3563,7 +3982,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// Lower the read_register intrinsic to ARM specific DAG nodes
// using the supplied metadata string to select the instruction node to use
// and the registers/masks to construct as operands for the node.
-SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
+bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
bool IsThumb2 = Subtarget->isThumb2();
@@ -3592,7 +4011,8 @@ SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
Ops.push_back(getAL(CurDAG, DL));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(N->getOperand(0));
- return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
+ return true;
}
std::string SpecialReg = RegString->getString().lower();
@@ -3602,8 +4022,10 @@ SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
N->getOperand(0) };
- return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
- DL, MVT::i32, MVT::Other, Ops);
+ ReplaceNode(
+ N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
+ DL, MVT::i32, MVT::Other, Ops));
+ return true;
}
// The VFP registers are read by creating SelectionDAG nodes with opcodes
@@ -3623,27 +4045,37 @@ SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
// If an opcode was found then we can lower the read to a VFP instruction.
if (Opcode) {
if (!Subtarget->hasVFP2())
- return nullptr;
+ return false;
if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
- return nullptr;
+ return false;
Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
N->getOperand(0) };
- return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
+ ReplaceNode(N,
+ CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
+ return true;
}
// If the target is M Class then need to validate that the register string
// is an acceptable value, so check that a mask can be constructed from the
// string.
if (Subtarget->isMClass()) {
- int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
+ StringRef Flags = "", Reg = SpecialReg;
+ if (Reg.endswith("_ns")) {
+ Flags = "ns";
+ Reg = Reg.drop_back(3);
+ }
+
+ int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
if (SYSmValue == -1)
- return nullptr;
+ return false;
SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
N->getOperand(0) };
- return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
+ ReplaceNode(
+ N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
+ return true;
}
// Here we know the target is not M Class so we need to check if it is one
@@ -3651,24 +4083,27 @@ SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
N->getOperand(0) };
- return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
- MVT::i32, MVT::Other, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
+ DL, MVT::i32, MVT::Other, Ops));
+ return true;
}
if (SpecialReg == "spsr") {
Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
N->getOperand(0) };
- return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
- DL, MVT::i32, MVT::Other, Ops);
+ ReplaceNode(
+ N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
+ MVT::i32, MVT::Other, Ops));
+ return true;
}
- return nullptr;
+ return false;
}
// Lower the write_register intrinsic to ARM specific DAG nodes
// using the supplied metadata string to select the instruction node to use
// and the registers/masks to use in the nodes
-SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
+bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
bool IsThumb2 = Subtarget->isThumb2();
@@ -3698,7 +4133,8 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(N->getOperand(0));
- return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
+ return true;
}
std::string SpecialReg = RegString->getString().lower();
@@ -3707,8 +4143,10 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
N->getOperand(0) };
- return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
- DL, MVT::Other, Ops);
+ ReplaceNode(
+ N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
+ DL, MVT::Other, Ops));
+ return true;
}
// The VFP registers are written to by creating SelectionDAG nodes with
@@ -3724,16 +4162,17 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
if (Opcode) {
if (!Subtarget->hasVFP2())
- return nullptr;
+ return false;
Ops = { N->getOperand(2), getAL(CurDAG, DL),
CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
- return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
+ return true;
}
- SmallVector<StringRef, 5> Fields;
- StringRef(SpecialReg).split(Fields, '_', 1, false);
- std::string Reg = Fields[0].str();
- StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
+ std::pair<StringRef, StringRef> Fields;
+ Fields = StringRef(SpecialReg).rsplit('_');
+ std::string Reg = Fields.first.str();
+ StringRef Flags = Fields.second;
// If the target was M Class then need to validate the special register value
// and retrieve the mask for use in the instruction node.
@@ -3745,12 +4184,13 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
}
int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
if (SYSmValue == -1)
- return nullptr;
+ return false;
SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
N->getOperand(2), getAL(CurDAG, DL),
CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
- return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
+ return true;
}
// We then check to see if a valid mask can be constructed for one of the
@@ -3761,14 +4201,15 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
N->getOperand(0) };
- return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
- DL, MVT::Other, Ops);
+ ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
+ DL, MVT::Other, Ops));
+ return true;
}
- return nullptr;
+ return false;
}
-SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
+bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
std::vector<SDValue> AsmNodeOperands;
unsigned Flag, Kind;
bool Changed = false;
@@ -3823,6 +4264,17 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
IsTiedToChangedOp = OpChanged[DefIdx];
+ // Memory operands to inline asm in the SelectionDAG are modeled with two
+ // operands: a constant of value InlineAsm::Kind_Mem followed by the input
+ // operand. If we get here and we have a Kind_Mem, skip the next operand (so
+ // it doesn't get misinterpreted), and continue. We do this here because
+ // it's important to update the OpChanged array correctly before moving on.
+ if (Kind == InlineAsm::Kind_Mem) {
+ SDValue op = N->getOperand(++i);
+ AsmNodeOperands.push_back(op);
+ continue;
+ }
+
if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
&& Kind != InlineAsm::Kind_RegDefEarlyClobber)
continue;
@@ -3912,12 +4364,13 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
if (Glue.getNode())
AsmNodeOperands.push_back(Glue);
if (!Changed)
- return nullptr;
+ return false;
SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
New->setNodeId(-1);
- return New.getNode();
+ ReplaceNode(N, New.getNode());
+ return true;
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 978e99cf511e..d6e7caf98a80 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -65,6 +65,13 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
+// Disabled for causing self-hosting failures once returned-attribute inference
+// was enabled.
+static cl::opt<bool>
+EnableThisRetForwarding("arm-this-return-forwarding", cl::Hidden,
+ cl::desc("Directly forward this return"),
+ cl::init(false));
+
namespace {
class ARMCCState : public CCState {
public:
@@ -240,7 +247,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Set the correct calling convention for ARMv7k WatchOS. It's just
// AAPCS_VFP for functions as simple as libcalls.
- if (Subtarget->isTargetWatchOS()) {
+ if (Subtarget->isTargetWatchABI()) {
for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
}
@@ -254,7 +261,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// RTLIB
if (Subtarget->isAAPCS_ABI() &&
(Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
- Subtarget->isTargetAndroid())) {
+ Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
@@ -390,10 +397,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
- { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP },
- { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP },
- { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP },
- { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP },
};
for (const auto &LC : LibraryCalls) {
@@ -410,17 +413,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
- // The half <-> float conversion functions are always soft-float, but are
- // needed for some targets which use a hard-float calling convention by
- // default.
- if (Subtarget->isAAPCS_ABI()) {
- setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
- } else {
- setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
- setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
- setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
+ // The half <-> float conversion functions are always soft-float on
+ // non-watchos platforms, but are needed for some targets which use a
+ // hard-float calling convention by default.
+ if (!Subtarget->isTargetWatchABI()) {
+ if (Subtarget->isAAPCS_ABI()) {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
+ } else {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
+ }
}
// In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
@@ -581,6 +586,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
+ setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
// NEON does not have single instruction CTTZ for vectors.
setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
@@ -712,6 +722,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
}
+ } else {
+ // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
+ setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
}
setOperationAction(ISD::SADDO, MVT::i32, Custom);
@@ -758,10 +772,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
- // These just redirect to CTTZ and CTLZ on ARM.
- setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
-
// @llvm.readcyclecounter requires the Performance Monitors extension.
// Default to the 0 expansion on unsupported platforms.
// FIXME: Technically there are older ARM CPUs that have
@@ -773,19 +783,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
- !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
+ : Subtarget->hasDivideInARMMode();
+ if (!hasDivide) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
setOperationAction(ISD::SDIV, MVT::i32, LibCall);
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
+ if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) {
+ setOperationAction(ISD::SDIV, MVT::i32, Custom);
+ setOperationAction(ISD::UDIV, MVT::i32, Custom);
+
+ setOperationAction(ISD::SDIV, MVT::i64, Custom);
+ setOperationAction(ISD::UDIV, MVT::i64, Custom);
+ }
+
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
- if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) {
+ if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
+ Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) {
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
+ HasStandaloneRem = false;
setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
@@ -807,6 +828,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
} else {
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
@@ -833,21 +856,21 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
- // the default expansion. If we are targeting a single threaded system,
- // then set them all for expand so we can lower them later into their
- // non-atomic form.
- if (TM.Options.ThreadModel == ThreadModel::Single)
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // the default expansion.
+ InsertFencesForAtomic = false;
+ if (Subtarget->hasAnyDataBarrier() &&
+ (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
// ATOMIC_FENCE needs custom lowering; the others should have been expanded
// to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ if (!Subtarget->isThumb() || !Subtarget->isMClass())
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
- if (!Subtarget->hasV8Ops()) {
+ if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
- setInsertFencesForAtomic(true);
+ InsertFencesForAtomic = true;
}
} else {
// If there's anything we can use as a barrier, go through custom lowering
@@ -909,6 +932,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+ // Thumb-1 cannot currently select ARMISD::SUBE.
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::SETCCE, MVT::i32, Custom);
+
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
@@ -956,7 +983,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->isTargetWatchOS()) {
+ if (Subtarget->isTargetWatchABI()) {
setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
}
@@ -1039,7 +1066,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setMinStackArgumentAlignment(4);
// Prefer likely predicted branches to selects on out-of-order cores.
- PredictableSelectIsExpensive = Subtarget->isLikeA9();
+ PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -1106,7 +1133,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
- case ARMISD::tCALL: return "ARMISD::tCALL";
case ARMISD::BRCOND: return "ARMISD::BRCOND";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
@@ -1123,6 +1149,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::SSAT: return "ARMISD::SSAT";
+
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1199,6 +1227,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
@@ -1373,7 +1402,10 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::ARM_APCS:
case CallingConv::GHC:
return CC;
+ case CallingConv::PreserveMost:
+ return CallingConv::PreserveMost;
case CallingConv::ARM_AAPCS_VFP:
+ case CallingConv::Swift:
return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
@@ -1415,18 +1447,18 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
case CallingConv::GHC:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
+ case CallingConv::PreserveMost:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
-SDValue
-ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- bool isThisReturn, SDValue ThisVal) const {
+SDValue ARMTargetLowering::LowerCallResult(
+ SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
+ SDValue ThisVal) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
@@ -1442,7 +1474,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
- if (i == 0 && isThisReturn) {
+ if (i == 0 && isThisReturn && EnableThisRetForwarding) {
assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
@@ -1506,23 +1538,21 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
}
/// LowerMemOpCallTo - Store the argument to the stack.
-SDValue
-ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
- SDValue StackPtr, SDValue Arg,
- SDLoc dl, SelectionDAG &DAG,
- const CCValAssign &VA,
- ISD::ArgFlagsTy Flags) const {
+SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
+ SDValue Arg, const SDLoc &dl,
+ SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
return DAG.getStore(
Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
- false, false, 0);
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
}
-void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
+void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
@@ -1704,7 +1734,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, false,
DAG.InferPtrAlignment(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
@@ -1780,20 +1809,27 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
bool isDirect = false;
- bool isARMFunc = false;
+
+ const TargetMachine &TM = getTargetMachine();
+ const Module *Mod = MF.getFunction()->getParent();
+ const GlobalValue *GV = nullptr;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ GV = G->getGlobal();
+ bool isStub =
+ !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
+
+ bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
bool isLocalARMFunc = false;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
auto PtrVt = getPointerTy(DAG.getDataLayout());
if (Subtarget->genLongCalls()) {
- assert((Subtarget->isTargetWindows() ||
- getTargetMachine().getRelocationModel() == Reloc::Static) &&
- "long-calls with non-static relocation model!");
+ assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
+ "long-calls codegen is not position independent!");
// Handle a global address or an external symbol. If it's not one of
// those, the target's already in a register, so we don't need to do
// anything extra.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
+ if (isa<GlobalAddressSDNode>(Callee)) {
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
@@ -1804,8 +1840,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1819,54 +1854,55 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
- }
- } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- isDirect = true;
- bool isDef = GV->isStrongDefinitionForLinker();
- bool isStub = (!isDef && Subtarget->isTargetMachO()) &&
- getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
- // ARM call to a local ARM function is predicable.
- isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
- // tBX takes a register source operand.
- if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
- assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
- Callee = DAG.getNode(
- ARMISD::WrapperPIC, dl, PtrVt,
- DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, true, 0);
- } else if (Subtarget->isTargetCOFF()) {
- assert(Subtarget->isTargetWindows() &&
- "Windows is the only supported COFF target");
- unsigned TargetFlags = GV->hasDLLImportStorageClass()
- ? ARMII::MO_DLLIMPORT
- : ARMII::MO_NO_FLAG;
- Callee =
- DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
- if (GV->hasDLLImportStorageClass())
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ }
+ } else if (isa<GlobalAddressSDNode>(Callee)) {
+ // If we're optimizing for minimum size and the function is called three or
+ // more times in this block, we can improve codesize by calling indirectly
+ // as BLXr has a 16-bit encoding.
+ auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ auto *BB = CLI.CS->getParent();
+ bool PreferIndirect =
+ Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
+ std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) {
+ return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
+ }) > 2;
+
+ if (!PreferIndirect) {
+ isDirect = true;
+ bool isDef = GV->isStrongDefinitionForLinker();
+
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
+ // tBX takes a register source operand.
+ if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
+ Callee = DAG.getNode(
+ ARMISD::WrapperPIC, dl, PtrVt,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee =
- DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
- DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
+ DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
- } else {
- // On ELF targets for PIC code, direct calls should go through the PLT
- unsigned OpFlags = 0;
- if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_)
- OpFlags = ARMII::MO_PLT;
- Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);
+ /* Alignment = */ 0, MachineMemOperand::MOInvariant);
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+ unsigned TargetFlags = GV->hasDLLImportStorageClass()
+ ? ARMII::MO_DLLIMPORT
+ : ARMII::MO_NO_FLAG;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
+ TargetFlags);
+ if (GV->hasDLLImportStorageClass())
+ Callee =
+ DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
+ DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ } else {
+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
+ }
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
- bool isStub = Subtarget->isTargetMachO() &&
- getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
@@ -1878,17 +1914,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
} else {
- unsigned OpFlags = 0;
- // On ELF targets for PIC code, direct calls should go through the PLT
- if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_)
- OpFlags = ARMII::MO_PLT;
- Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
}
}
@@ -1898,11 +1928,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else
- CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ CallOpc = ARMISD::CALL;
} else {
if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
- else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
+ else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
// Emit regular call when code size is the priority
!MF.getFunction()->optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
@@ -2042,7 +2072,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
if (!Def)
return false;
if (!Flags.isByVal()) {
- if (!TII->isLoadFromStackSlot(Def, FI))
+ if (!TII->isLoadFromStackSlot(*Def, FI))
return false;
} else {
return false;
@@ -2082,9 +2112,9 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
- const Function *CallerF = DAG.getMachineFunction().getFunction();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
- bool CCMatch = CallerCC == CalleeCC;
assert(Subtarget->supportsTailCall());
@@ -2122,41 +2152,25 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
}
- // If the calling conventions do not match, then we'd better make sure the
- // results are returned in the same way as what the caller expects.
- if (!CCMatch) {
- SmallVector<CCValAssign, 16> RVLocs1;
- ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
- *DAG.getContext(), Call);
- CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
-
- SmallVector<CCValAssign, 16> RVLocs2;
- ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
- *DAG.getContext(), Call);
- CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
-
- if (RVLocs1.size() != RVLocs2.size())
+ // Check that the call results are passed in the same way.
+ LLVMContext &C = *DAG.getContext();
+ if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
+ CCAssignFnForNode(CalleeCC, true, isVarArg),
+ CCAssignFnForNode(CallerCC, true, isVarArg)))
+ return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
+ if (CalleeCC != CallerCC) {
+ const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
- for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
- if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
- return false;
- if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
- return false;
- if (RVLocs1[i].isRegLoc()) {
- if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
- return false;
- } else {
- if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
- return false;
- }
- }
}
// If Caller's vararg or byval argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
// local frame.
- const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
- getInfo<ARMFunctionInfo>();
+ const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
if (AFI_Caller->getArgRegsSaveSize())
return false;
@@ -2166,13 +2180,10 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), Call);
+ ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CalleeCC, false, isVarArg));
if (CCInfo.getNextStackOffset()) {
- MachineFunction &MF = DAG.getMachineFunction();
-
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -2209,6 +2220,10 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
}
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
+ return false;
}
return true;
@@ -2226,7 +2241,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
}
static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
- SDLoc DL, SelectionDAG &DAG) {
+ const SDLoc &DL, SelectionDAG &DAG) {
const MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
@@ -2259,11 +2274,11 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
}
SDValue
-ARMTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
+ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const {
+ const SDLoc &dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
@@ -2521,9 +2536,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SDLoc DL(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
- if (RelocM == Reloc::Static) {
+ bool IsPositionIndependent = isPositionIndependent();
+ if (!IsPositionIndependent) {
CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -2534,11 +2549,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
- SDValue Result =
- DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- false, false, false, 0);
- if (RelocM == Reloc::Static)
+ SDValue Result = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ if (!IsPositionIndependent)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
@@ -2584,7 +2598,8 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
SDValue FuncTLVGet =
DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, true, true, 4);
+ /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
+ MachineMemOperand::MOInvariant);
Chain = FuncTLVGet.getValue(1);
MachineFunction &F = DAG.getMachineFunction();
@@ -2610,6 +2625,61 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
}
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
+
+ SDValue Chain = DAG.getEntryNode();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc DL(Op);
+
+ // Load the current TEB (thread environment block)
+ SDValue Ops[] = {Chain,
+ DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+ DAG.getConstant(15, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(13, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(2, DL, MVT::i32)};
+ SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+
+ SDValue TEB = CurrentTEB.getValue(0);
+ Chain = CurrentTEB.getValue(1);
+
+ // Load the ThreadLocalStoragePointer from the TEB
+ // A pointer to the TLS array is located at offset 0x2c from the TEB.
+ SDValue TLSArray =
+ DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
+ TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
+
+ // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
+ // offset into the TLSArray.
+
+ // Load the TLS index from the C runtime
+ SDValue TLSIndex =
+ DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
+ TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
+ TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
+
+ SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
+ DAG.getConstant(2, DL, MVT::i32));
+ SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
+ DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
+ MachinePointerInfo());
+
+ // Get the offset of the start of the .tls section (section base)
+ const auto *GA = cast<GlobalAddressSDNode>(Op);
+ auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
+ SDValue Offset = DAG.getLoad(
+ PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
+ DAG.getTargetConstantPool(CPV, PtrVT, 4)),
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
+}
+
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@@ -2625,10 +2695,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
- Argument =
- DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- false, false, false, 0);
+ Argument = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2645,8 +2714,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
- 0);
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2680,8 +2748,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2689,8 +2756,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
} else {
// local exec model
assert(model == TLSModel::LocalExec);
@@ -2700,8 +2766,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
// The address of the thread local variable is the add of the thread
@@ -2714,6 +2779,9 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->isTargetDarwin())
return LowerGlobalTLSAddressDarwin(Op, DAG);
+ if (Subtarget->isTargetWindows())
+ return LowerGlobalTLSAddressWindows(Op, DAG);
+
// TODO: implement the "local dynamic" model
assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
@@ -2738,9 +2806,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- bool UseGOT_PREL =
- !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+ const TargetMachine &TM = getTargetMachine();
+ if (isPositionIndependent()) {
+ bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2756,15 +2824,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue Chain = Result.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
if (UseGOT_PREL)
- Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
+ Result =
+ DAG.getLoad(PtrVT, dl, Chain, Result,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
@@ -2781,8 +2848,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
}
@@ -2791,7 +2857,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
if (Subtarget->useMovt(DAG.getMachineFunction()))
++NumMovwMovt;
@@ -2799,15 +2864,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into multiple nodes
unsigned Wrapper =
- RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
+ isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
- if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ if (Subtarget->isGVIndirectSymbol(GV))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
@@ -2833,8 +2897,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
TargetFlags));
if (GV->hasDLLImportStorageClass())
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
@@ -2873,7 +2936,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
"RBIT intrinsic must have i32 type!");
return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
}
- case Intrinsic::arm_thread_pointer: {
+ case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
@@ -2882,10 +2945,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
- unsigned PCAdj = (RelocM != Reloc::PIC_)
- ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ bool IsPositionIndependent = isPositionIndependent();
+ unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
ARMCP::CPLSDA, PCAdj);
@@ -2893,10 +2955,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
- if (RelocM == Reloc::PIC_) {
+ if (IsPositionIndependent) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
@@ -2962,7 +3023,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
if (Subtarget->isMClass()) {
// Only a full system barrier exists in the M-class architectures.
Domain = ARM_MB::SY;
- } else if (Subtarget->isSwift() && Ord == Release) {
+ } else if (Subtarget->preferISHSTBarriers() &&
+ Ord == AtomicOrdering::Release) {
// Swift happens to implement ISHST barriers in a way that's compatible with
// Release semantics but weaker than ISH so we'd be fools not to use
// it. Beware: other processors probably don't!
@@ -3012,13 +3074,14 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
- MachinePointerInfo(SV), false, false, 0);
+ MachinePointerInfo(SV));
}
-SDValue
-ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
- SDValue &Root, SelectionDAG &DAG,
- SDLoc dl) const {
+SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
+ CCValAssign &NextVA,
+ SDValue &Root,
+ SelectionDAG &DAG,
+ const SDLoc &dl) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -3041,8 +3104,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
ArgValue2 = DAG.getLoad(
MVT::i32, dl, Root, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
- false, false, 0);
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -3060,13 +3122,11 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
// these values; otherwise, this reassembles a (byval) structure that
// was split between registers and memory.
// Return: The frame index registers were stored into.
-int
-ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
- SDLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned InRegsParamRecordIdx,
- int ArgOffset,
- unsigned ArgSize) const {
+int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ int ArgOffset, unsigned ArgSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
@@ -3104,9 +3164,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
unsigned VReg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(OrigArg, 4 * i));
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
}
@@ -3117,17 +3176,16 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
}
// Setup stack frame, the va_list pointer will start from.
-void
-ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- SDLoc dl, SDValue &Chain,
- unsigned ArgOffset,
- unsigned TotalArgRegsSaveSize,
- bool ForceMutable) const {
+void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue &Chain,
+ unsigned ArgOffset,
+ unsigned TotalArgRegsSaveSize,
+ bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// Try to store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing
+ // to their spots on the stack so that they may be loaded by dereferencing
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
@@ -3137,14 +3195,10 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
AFI->setVarArgsFrameIndex(FrameIndex);
}
-SDValue
-ARMTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
+SDValue ARMTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3226,10 +3280,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isMemLoc()) {
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgValue2 = DAG.getLoad(
- MVT::f64, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
- false, false, false, 0);
+ ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI));
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
@@ -3322,10 +3375,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(DAG.getLoad(
- VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
- false, false, false, 0));
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI)));
}
lastInsIndex = index;
}
@@ -3369,10 +3421,9 @@ static bool isFloatingPointZero(SDValue Op) {
/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
/// the given operands.
-SDValue
-ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMcc, SelectionDAG &DAG,
- SDLoc dl) const {
+SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG,
+ const SDLoc &dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
if (!isLegalICmpImmediate(C)) {
@@ -3428,9 +3479,8 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
-SDValue
-ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- SDLoc dl) const {
+SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const SDLoc &dl) const {
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
if (!isFloatingPointZero(RHS))
@@ -3647,7 +3697,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
}
-SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
+SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
SDValue TrueVal, SDValue ARMcc, SDValue CCR,
SDValue Cmp, SelectionDAG &DAG) const {
if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
@@ -3673,14 +3723,149 @@ SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
}
}
+static bool isGTorGE(ISD::CondCode CC) {
+ return CC == ISD::SETGT || CC == ISD::SETGE;
+}
+
+static bool isLTorLE(ISD::CondCode CC) {
+ return CC == ISD::SETLT || CC == ISD::SETLE;
+}
+
+// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
+// All of these conditions (and their <= and >= counterparts) will do:
+// x < k ? k : x
+// x > k ? x : k
+// k < x ? x : k
+// k > x ? k : x
+static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
+ const SDValue TrueVal, const SDValue FalseVal,
+ const ISD::CondCode CC, const SDValue K) {
+ return (isGTorGE(CC) &&
+ ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
+ (isLTorLE(CC) &&
+ ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
+}
+
+// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
+static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
+ const SDValue TrueVal, const SDValue FalseVal,
+ const ISD::CondCode CC, const SDValue K) {
+ return (isGTorGE(CC) &&
+ ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
+ (isLTorLE(CC) &&
+ ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
+}
+
+// Check if two chained conditionals could be converted into SSAT.
+//
+// SSAT can replace a set of two conditional selectors that bound a number to an
+// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
+//
+// x < -k ? -k : (x > k ? k : x)
+// x < -k ? -k : (x < k ? x : k)
+// x > -k ? (x > k ? k : x) : -k
+// x < k ? (x < -k ? -k : x) : k
+// etc.
+//
+// It returns true if the conversion can be done, false otherwise.
+// Additionally, the variable is returned in parameter V and the constant in K.
+static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
+ uint64_t &K) {
+
+ SDValue LHS1 = Op.getOperand(0);
+ SDValue RHS1 = Op.getOperand(1);
+ SDValue TrueVal1 = Op.getOperand(2);
+ SDValue FalseVal1 = Op.getOperand(3);
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
+ if (Op2.getOpcode() != ISD::SELECT_CC)
+ return false;
+
+ SDValue LHS2 = Op2.getOperand(0);
+ SDValue RHS2 = Op2.getOperand(1);
+ SDValue TrueVal2 = Op2.getOperand(2);
+ SDValue FalseVal2 = Op2.getOperand(3);
+ ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
+
+ // Find out which are the constants and which are the variables
+ // in each conditional
+ SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
+ ? &RHS1
+ : NULL;
+ SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
+ ? &RHS2
+ : NULL;
+ SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
+ SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
+ SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
+ SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
+
+ // We must detect cases where the original operations worked with 16- or
+ // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
+ // must work with sign-extended values but the select operations return
+ // the original non-extended value.
+ SDValue V2TmpReg = V2Tmp;
+ if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
+ V2TmpReg = V2Tmp->getOperand(0);
+
+ // Check that the registers and the constants have the correct values
+ // in both conditionals
+ if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
+ V2TmpReg != V2)
+ return false;
+
+ // Figure out which conditional is saturating the lower/upper bound.
+ const SDValue *LowerCheckOp =
+ isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
+ ? &Op
+ : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
+ : NULL;
+ const SDValue *UpperCheckOp =
+ isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
+ ? &Op
+ : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
+ : NULL;
+
+ if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
+ return false;
+
+ // Check that the constant in the lower-bound check is
+ // the opposite of the constant in the upper-bound check
+ // in 1's complement.
+ int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
+ int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
+ int64_t PosVal = std::max(Val1, Val2);
+
+ if (((Val1 > Val2 && UpperCheckOp == &Op) ||
+ (Val1 < Val2 && UpperCheckOp == &Op2)) &&
+ Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
+
+ V = V2;
+ K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
+ return true;
+ }
+
+ return false;
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+
EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+
+ // Try to convert two saturating conditional selects into a single SSAT
+ SDValue SatValue;
+ uint64_t SatConstant;
+ if (isSaturatingConditional(Op, SatValue, SatConstant))
+ return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
+ DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
+
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
- SDLoc dl(Op);
if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
@@ -3781,10 +3966,9 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
return DAG.getConstant(0, SDLoc(Op), MVT::i32);
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
- return DAG.getLoad(MVT::i32, SDLoc(Op),
- Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->isInvariant(), Ld->getAlignment());
+ return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
llvm_unreachable("Unknown VFP cmp argument!");
}
@@ -3801,21 +3985,17 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
SDValue Ptr = Ld->getBasePtr();
- RetVal1 = DAG.getLoad(MVT::i32, dl,
- Ld->getChain(), Ptr,
- Ld->getPointerInfo(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->isInvariant(), Ld->getAlignment());
+ RetVal1 =
+ DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
+ Ld->getAlignment(), Ld->getMemOperand()->getFlags());
EVT PtrType = Ptr.getValueType();
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
- RetVal2 = DAG.getLoad(MVT::i32, dl,
- Ld->getChain(), NewPtr,
- Ld->getPointerInfo().getWithOffset(4),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->isInvariant(), NewAlign);
+ RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
+ Ld->getPointerInfo().getWithOffset(4), NewAlign,
+ Ld->getMemOperand()->getFlags());
return;
}
@@ -3908,8 +4088,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
if (getTargetMachine().Options.UnsafeFPMath &&
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
CC == ISD::SETNE || CC == ISD::SETUNE)) {
- SDValue Result = OptimizeVFPBrcond(Op, DAG);
- if (Result.getNode())
+ if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
return Result;
}
@@ -3950,19 +4129,17 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
Addr, Op.getOperand(2), JTI);
}
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ if (isPositionIndependent()) {
Addr =
DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
} else {
Addr =
DAG.getLoad(PTy, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
}
@@ -4156,7 +4333,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
- MachinePointerInfo(), false, false, false, 0);
+ MachinePointerInfo());
}
// Return LR, which contains the return address. Mark it an implicit live-in.
@@ -4178,8 +4355,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
- MachinePointerInfo(),
- false, false, false, 0);
+ MachinePointerInfo());
return FrameAddr;
}
@@ -4322,7 +4498,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
/// not support i64 elements, so sometimes the zero vectors will need to be
/// explicitly constructed. Regardless, use a canonical VMOV to create the
/// zero vector.
-static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert(VT.isVector() && "Expected a vector type");
// The canonical modified immediate encoding of a zero vector is....0!
SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
@@ -4826,12 +5002,36 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
return Result;
}
+static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Carry = Op.getOperand(2);
+ SDValue Cond = Op.getOperand(3);
+ SDLoc DL(Op);
+
+ assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
+
+ assert(Carry.getOpcode() != ISD::CARRY_FALSE);
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+ SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
+
+ SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
+ SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
+ SDValue ARMcc = DAG.getConstant(
+ IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
+ Cmp.getValue(1), SDValue());
+ return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
+ CCR, Chain.getValue(1));
+}
+
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
/// valid vector constant for a NEON instruction with a "modified immediate"
/// operand (e.g., VMOV). If so, return the encoded value.
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
- SDLoc dl, EVT &VT, bool is128Bits,
+ const SDLoc &dl, EVT &VT, bool is128Bits,
NEONModImmType type) {
unsigned OpCmode, Imm;
@@ -4979,7 +5179,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
return SDValue();
// Try splatting with a VMOV.f32...
- APFloat FPVal = CFP->getValueAPF();
+ const APFloat &FPVal = CFP->getValueAPF();
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
if (ImmVal != -1) {
@@ -5421,7 +5621,7 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) {
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
- const ARMSubtarget *ST, SDLoc dl) {
+ const ARMSubtarget *ST, const SDLoc &dl) {
uint64_t Val;
if (!isa<ConstantSDNode>(N))
return SDValue();
@@ -5502,7 +5702,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue Value;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
if (i > 0)
isOnlyLowElement = false;
@@ -5585,7 +5785,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
+ SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
@@ -5635,7 +5835,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue Vec = DAG.getUNDEF(VT);
for (unsigned i = 0 ; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
@@ -5681,7 +5881,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
// A shuffle can only come from building a vector from various
@@ -5808,7 +6008,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF)
+ if (Entry.isUndef())
continue;
auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
@@ -5845,7 +6045,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
ShuffleOps[i] = Sources[i].ShuffleVec;
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
- ShuffleOps[1], &Mask[0]);
+ ShuffleOps[1], Mask);
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
@@ -5895,7 +6095,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
- SDLoc dl) {
+ const SDLoc &dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
@@ -5982,12 +6182,12 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
- if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ if (V2.getNode()->isUndef())
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
+ DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
+ DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
}
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
@@ -6024,7 +6224,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize <= 32) {
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;
@@ -6040,7 +6240,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
!isa<ConstantSDNode>(V1.getOperand(0))) {
bool IsScalarToVector = true;
for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
- if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ if (!V1.getOperand(i).isUndef()) {
IsScalarToVector = false;
break;
}
@@ -6067,8 +6267,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (isVREVMask(ShuffleMask, VT, 16))
return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
- if (V2->getOpcode() == ISD::UNDEF &&
- isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
+ if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
DAG.getConstant(Imm, dl, MVT::i32));
}
@@ -6103,8 +6302,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// ->
// concat(VZIP(v1, v2):0, :1)
//
- if (V1->getOpcode() == ISD::CONCAT_VECTORS &&
- V2->getOpcode() == ISD::UNDEF) {
+ if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
SDValue SubV1 = V1->getOperand(0);
SDValue SubV2 = V1->getOperand(1);
EVT SubVT = SubV1.getValueType();
@@ -6175,11 +6373,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
- if (VT == MVT::v8i8) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
- if (NewOp.getNode())
+ if (VT == MVT::v8i8)
+ if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
return NewOp;
- }
return SDValue();
}
@@ -6218,11 +6414,11 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDValue Val = DAG.getUNDEF(MVT::v2f64);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (Op0.getOpcode() != ISD::UNDEF)
+ if (!Op0.isUndef())
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
DAG.getIntPtrConstant(0, dl));
- if (Op1.getOpcode() != ISD::UNDEF)
+ if (!Op1.isUndef())
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
DAG.getIntPtrConstant(1, dl));
@@ -6351,17 +6547,16 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
// The load already has the right type.
if (ExtendedTy == LD->getMemoryVT())
return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
- LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment());
+ LD->getBasePtr(), LD->getPointerInfo(),
+ LD->getAlignment(), LD->getMemOperand()->getFlags());
// We need to create a zextload/sextload. We cannot just create a load
// followed by a zext/zext node because LowerMUL is also run during normal
// operation legalization where we can't create illegal types.
return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
- LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->getMemoryVT(), LD->getAlignment(),
+ LD->getMemOperand()->getFlags());
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
@@ -6387,8 +6582,9 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
- BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+ return DAG.getBuildVector(
+ MVT::v2i32, SDLoc(N),
+ {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
}
// Construct a new BUILD_VECTOR with elements truncated to half the size.
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
@@ -6405,8 +6601,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl,
- MVT::getVectorVT(TruncVT, NumElts), Ops);
+ return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
@@ -6506,8 +6701,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
-static SDValue
-LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
+ SelectionDAG &DAG) {
// TODO: Should this propagate fast-math-flags?
// Convert to float
@@ -6528,8 +6723,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
// float4 result = as_float4(as_int4(xf*recip) + 0xb000);
X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
- Y = DAG.getConstant(0xb000, dl, MVT::i32);
- Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
// Convert back to short.
@@ -6538,8 +6732,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
return X;
}
-static SDValue
-LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
+ SelectionDAG &DAG) {
// TODO: Should this propagate fast-math-flags?
SDValue N2;
@@ -6567,8 +6761,7 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
// float4 result = as_float4(as_int4(xf*recip) + 0x89);
N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
- N1 = DAG.getConstant(0x89, dl, MVT::i32);
- N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
// Convert back to integer and return.
@@ -6679,8 +6872,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
// float4 result = as_float4(as_int4(xf*recip) + 2);
N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
- N1 = DAG.getConstant(2, dl, MVT::i32);
- N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N1 = DAG.getConstant(2, dl, MVT::v4i32);
N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
// Convert back to integer and return.
@@ -6766,21 +6958,21 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
- .setCallee(CC, RetTy, Callee, std::move(Args), 0)
+ .setCallee(CC, RetTy, Callee, std::move(Args))
.setDiscardResult(ShouldUseSRet);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
if (!ShouldUseSRet)
return CallResult.first;
- SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue LoadSin =
+ DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
// Address of cos field.
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
- SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue LoadCos =
+ DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
@@ -6819,7 +7011,7 @@ SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
CLI.setDebugLoc(dl)
.setChain(Chain)
.setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
- ES, std::move(Args), 0);
+ ES, std::move(Args));
return LowerCallTo(CLI).first;
}
@@ -6867,13 +7059,13 @@ void ARMTargetLowering::ExpandDIV_Windows(
}
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
- // Monotonic load/store is legal for all targets
- if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
- return Op;
+ if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
+ // Acquire/Release load/store is not legal for targets without a dmb or
+ // equivalent available.
+ return SDValue();
- // Acquire/Release load/store is not legal for targets without a
- // dmb or equivalent available.
- return SDValue();
+ // Monotonic load/store is legal for all targets.
+ return Op;
}
static void ReplaceREADCYCLECOUNTER(SDNode *N,
@@ -6899,6 +7091,46 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
Results.push_back(Cycles32.getValue(1));
}
+static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
+ SDLoc dl(V.getNode());
+ SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
+ SDValue VHi = DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
+ dl, MVT::i32);
+ SDValue RegClass =
+ DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
+ SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
+ SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
+ return SDValue(
+ DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
+}
+
+static void ReplaceCMP_SWAP_64Results(SDNode *N,
+ SmallVectorImpl<SDValue> & Results,
+ SelectionDAG &DAG) {
+ assert(N->getValueType(0) == MVT::i64 &&
+ "AtomicCmpSwap on types less than 64 should be legal");
+ SDValue Ops[] = {N->getOperand(1),
+ createGPRPairNode(DAG, N->getOperand(2)),
+ createGPRPairNode(DAG, N->getOperand(3)),
+ N->getOperand(0)};
+ SDNode *CmpSwap = DAG.getMachineNode(
+ ARM::CMP_SWAP_64, SDLoc(N),
+ DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+
+ Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
+ SDValue(CmpSwap, 0)));
+ Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
+ SDValue(CmpSwap, 0)));
+ Results.push_back(SDValue(CmpSwap, 2));
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -6948,6 +7180,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SETCCE: return LowerSETCCE(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
@@ -6956,8 +7189,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
- case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::UDIV: return LowerUDIV(Op, DAG);
+ case ISD::SDIV:
+ if (Subtarget->isTargetWindows())
+ return LowerDIV_Windows(Op, DAG, /* Signed */ true);
+ return LowerSDIV(Op, DAG);
+ case ISD::UDIV:
+ if (Subtarget->isTargetWindows())
+ return LowerDIV_Windows(Op, DAG, /* Signed */ false);
+ return LowerUDIV(Op, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
@@ -7005,6 +7244,13 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::UREM:
Res = LowerREM(N, DAG);
break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ Res = LowerDivRem(SDValue(N, 0), DAG);
+ assert(Res.getNumOperands() == 2 && "DivRem needs two values");
+ Results.push_back(Res.getValue(0));
+ Results.push_back(Res.getValue(1));
+ return;
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
@@ -7013,6 +7259,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
Results);
+ case ISD::ATOMIC_CMP_SWAP:
+ ReplaceCMP_SWAP_64Results(N, Results, DAG);
+ return;
}
if (Res.getNode())
Results.push_back(Res);
@@ -7024,11 +7273,12 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
-void ARMTargetLowering::
-SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
- MachineBasicBlock *DispatchBB, int FI) const {
+void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB,
+ int FI) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineConstantPool *MCP = MF->getConstantPool();
@@ -7139,10 +7389,10 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
}
}
-void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
+void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineFrameInfo *MFI = MF->getFrameInfo();
@@ -7182,7 +7432,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// Get an ordered list of the machine basic blocks for the jump table.
std::vector<MachineBasicBlock*> LPadList;
- SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
+ SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
LPadList.reserve(CallSiteNumToLPad.size());
for (unsigned I = 1; I <= MaxCSNum; ++I) {
SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
@@ -7200,7 +7450,6 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
MachineJumpTableInfo *JTI =
MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
// Create the MBBs for the dispatch code.
@@ -7244,6 +7493,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// registers being marked as clobbered.
MIB.addRegMask(RI.getNoPreservedMask());
+ bool IsPositionIndependent = isPositionIndependent();
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
@@ -7357,7 +7607,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
.addMemOperand(JTMMOLd));
unsigned NewVReg6 = NewVReg5;
- if (RelocM == Reloc::PIC_) {
+ if (IsPositionIndependent) {
NewVReg6 = MRI->createVirtualRegister(TRC);
AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
.addReg(ARM::CPSR, RegState::Define)
@@ -7440,7 +7690,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
.addImm(0)
.addMemOperand(JTMMOLd));
- if (RelocM == Reloc::PIC_) {
+ if (IsPositionIndependent) {
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
.addReg(NewVReg5, RegState::Kill)
.addReg(NewVReg4)
@@ -7524,7 +7774,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
(*I)->setIsEHPad(false);
// The instruction is gone now.
- MI->eraseFromParent();
+ MI.eraseFromParent();
}
static
@@ -7576,8 +7826,8 @@ static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
/// Emit a post-increment load operation with given size. The instructions
/// will be added to BB at Pos.
-static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
- const TargetInstrInfo *TII, DebugLoc dl,
+static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
+ const TargetInstrInfo *TII, const DebugLoc &dl,
unsigned LdSize, unsigned Data, unsigned AddrIn,
unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
@@ -7608,8 +7858,8 @@ static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
/// Emit a post-increment store operation with given size. The instructions
/// will be added to BB at Pos.
-static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
- const TargetInstrInfo *TII, DebugLoc dl,
+static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
+ const TargetInstrInfo *TII, const DebugLoc &dl,
unsigned StSize, unsigned Data, unsigned AddrIn,
unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
@@ -7637,7 +7887,7 @@ static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
}
MachineBasicBlock *
-ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ARMTargetLowering::EmitStructByval(MachineInstr &MI,
MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
@@ -7646,11 +7896,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = ++BB->getIterator();
- unsigned dest = MI->getOperand(0).getReg();
- unsigned src = MI->getOperand(1).getReg();
- unsigned SizeVal = MI->getOperand(2).getImm();
- unsigned Align = MI->getOperand(3).getImm();
- DebugLoc dl = MI->getDebugLoc();
+ unsigned dest = MI.getOperand(0).getReg();
+ unsigned src = MI.getOperand(1).getReg();
+ unsigned SizeVal = MI.getOperand(2).getImm();
+ unsigned Align = MI.getOperand(3).getImm();
+ DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7722,7 +7972,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
srcIn = srcOut;
destIn = destOut;
}
- MI->eraseFromParent(); // The instruction is gone now.
+ MI.eraseFromParent(); // The instruction is gone now.
return BB;
}
@@ -7848,7 +8098,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Add epilogue to handle BytesLeft.
BB = exitMBB;
- MachineInstr *StartOfExit = exitMBB->begin();
+ auto StartOfExit = exitMBB->begin();
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
@@ -7866,16 +8116,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
destIn = destOut;
}
- MI->eraseFromParent(); // The instruction is gone now.
+ MI.eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
-ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
MachineBasicBlock *MBB) const {
const TargetMachine &TM = getTargetMachine();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
assert(Subtarget->isTargetWindows() &&
"__chkstk is only supported on Windows");
@@ -7930,24 +8180,26 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
ARM::SP)
- .addReg(ARM::SP).addReg(ARM::R4)));
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)));
- MI->eraseFromParent();
+ MI.eraseFromParent();
return MBB;
}
MachineBasicBlock *
-ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
+ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
- MF->push_back(ContBB);
+ MF->insert(++MBB->getIterator(), ContBB);
ContBB->splice(ContBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- MBB->addSuccessor(ContBB);
+ ContBB->transferSuccessorsAndUpdatePHIs(MBB);
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
MF->push_back(TrapBB);
@@ -7955,74 +8207,89 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
MBB->addSuccessor(TrapBB);
BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
- .addReg(MI->getOperand(0).getReg())
+ .addReg(MI.getOperand(0).getReg())
.addMBB(TrapBB);
+ AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::t2B)).addMBB(ContBB));
+ MBB->addSuccessor(ContBB);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return ContBB;
}
MachineBasicBlock *
-ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = MI.getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default: {
- MI->dump();
+ MI.dump();
llvm_unreachable("Unexpected instr type to insert");
}
+
+ // Thumb1 post-indexed loads are really just single-register LDMs.
+ case ARM::tLDR_postidx: {
+ BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
+ .addOperand(MI.getOperand(1)) // Rn_wb
+ .addOperand(MI.getOperand(2)) // Rn
+ .addOperand(MI.getOperand(3)) // PredImm
+ .addOperand(MI.getOperand(4)) // PredReg
+ .addOperand(MI.getOperand(0)); // Rt
+ MI.eraseFromParent();
+ return BB;
+ }
+
// The Thumb2 pre-indexed stores have the same MI operands, they just
// define them differently in the .td files from the isel patterns, so
// they need pseudos.
case ARM::t2STR_preidx:
- MI->setDesc(TII->get(ARM::t2STR_PRE));
+ MI.setDesc(TII->get(ARM::t2STR_PRE));
return BB;
case ARM::t2STRB_preidx:
- MI->setDesc(TII->get(ARM::t2STRB_PRE));
+ MI.setDesc(TII->get(ARM::t2STRB_PRE));
return BB;
case ARM::t2STRH_preidx:
- MI->setDesc(TII->get(ARM::t2STRH_PRE));
+ MI.setDesc(TII->get(ARM::t2STRH_PRE));
return BB;
case ARM::STRi_preidx:
case ARM::STRBi_preidx: {
- unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
- ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
+ unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
+ : ARM::STRB_PRE_IMM;
// Decode the offset.
- unsigned Offset = MI->getOperand(4).getImm();
+ unsigned Offset = MI.getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
Offset = ARM_AM::getAM2Offset(Offset);
if (isSub)
Offset = -Offset;
- MachineMemOperand *MMO = *MI->memoperands_begin();
+ MachineMemOperand *MMO = *MI.memoperands_begin();
BuildMI(*BB, MI, dl, TII->get(NewOpc))
- .addOperand(MI->getOperand(0)) // Rn_wb
- .addOperand(MI->getOperand(1)) // Rt
- .addOperand(MI->getOperand(2)) // Rn
- .addImm(Offset) // offset (skip GPR==zero_reg)
- .addOperand(MI->getOperand(5)) // pred
- .addOperand(MI->getOperand(6))
- .addMemOperand(MMO);
- MI->eraseFromParent();
+ .addOperand(MI.getOperand(0)) // Rn_wb
+ .addOperand(MI.getOperand(1)) // Rt
+ .addOperand(MI.getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .addOperand(MI.getOperand(5)) // pred
+ .addOperand(MI.getOperand(6))
+ .addMemOperand(MMO);
+ MI.eraseFromParent();
return BB;
}
case ARM::STRr_preidx:
case ARM::STRBr_preidx:
case ARM::STRH_preidx: {
unsigned NewOpc;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default: llvm_unreachable("unexpected opcode!");
case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
- for (unsigned i = 0; i < MI->getNumOperands(); ++i)
- MIB.addOperand(MI->getOperand(i));
- MI->eraseFromParent();
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i)
+ MIB.addOperand(MI.getOperand(i));
+ MI.eraseFromParent();
return BB;
}
@@ -8055,8 +8322,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
- BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
- .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, dl, TII->get(ARM::tBcc))
+ .addMBB(sinkMBB)
+ .addImm(MI.getOperand(3).getImm())
+ .addReg(MI.getOperand(4).getReg());
// copy0MBB:
// %FalseValue = ...
@@ -8070,12 +8339,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(*BB, BB->begin(), dl,
- TII->get(ARM::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(1).getReg())
+ .addMBB(copy0MBB)
+ .addReg(MI.getOperand(2).getReg())
+ .addMBB(thisMBB);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -8086,10 +8356,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Compare both parts that make up the double comparison separately for
// equality.
- bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+ bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
- unsigned LHS1 = MI->getOperand(1).getReg();
- unsigned LHS2 = MI->getOperand(2).getReg();
+ unsigned LHS1 = MI.getOperand(1).getReg();
+ unsigned LHS2 = MI.getOperand(2).getReg();
if (RHSisZero) {
AddDefaultPred(BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
@@ -8098,8 +8368,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(LHS2).addImm(0)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
} else {
- unsigned RHS1 = MI->getOperand(3).getReg();
- unsigned RHS2 = MI->getOperand(4).getReg();
+ unsigned RHS1 = MI.getOperand(3).getReg();
+ unsigned RHS2 = MI.getOperand(4).getReg();
AddDefaultPred(BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(LHS1).addReg(RHS1));
@@ -8108,9 +8378,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
}
- MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
- if (MI->getOperand(0).getImm() == ARMCC::NE)
+ if (MI.getOperand(0).getImm() == ARMCC::NE)
std::swap(destMBB, exitMBB);
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -8120,7 +8390,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
else
BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -8157,9 +8427,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Fn->insert(BBI, RSBBB);
Fn->insert(BBI, SinkBB);
- unsigned int ABSSrcReg = MI->getOperand(1).getReg();
- unsigned int ABSDstReg = MI->getOperand(0).getReg();
- bool ABSSrcKIll = MI->getOperand(1).isKill();
+ unsigned int ABSSrcReg = MI.getOperand(1).getReg();
+ unsigned int ABSDstReg = MI.getOperand(0).getReg();
+ bool ABSSrcKIll = MI.getOperand(1).isKill();
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
@@ -8204,7 +8474,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(ABSSrcReg).addMBB(BB);
// remove ABS instruction
- MI->eraseFromParent();
+ MI.eraseFromParent();
// return last added BB
return SinkBB;
@@ -8223,38 +8493,38 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
/// when it is expanded into LDM/STM. This is done as a post-isel lowering
/// instead of as a custom inserter because we need the use list from the SDNode.
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
- MachineInstr *MI, const SDNode *Node) {
+ MachineInstr &MI, const SDNode *Node) {
bool isThumb1 = Subtarget->isThumb1Only();
- DebugLoc DL = MI->getDebugLoc();
- MachineFunction *MF = MI->getParent()->getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineInstrBuilder MIB(*MF, MI);
// If the new dst/src is unused mark it as dead.
if (!Node->hasAnyUseOfValue(0)) {
- MI->getOperand(0).setIsDead(true);
+ MI.getOperand(0).setIsDead(true);
}
if (!Node->hasAnyUseOfValue(1)) {
- MI->getOperand(1).setIsDead(true);
+ MI.getOperand(1).setIsDead(true);
}
// The MEMCPY both defines and kills the scratch registers.
- for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) {
+ for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
: &ARM::GPRRegClass);
MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
}
}
-void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
- if (MI->getOpcode() == ARM::MEMCPY) {
+ if (MI.getOpcode() == ARM::MEMCPY) {
attachMEMCPYScratchRegs(Subtarget, MI, Node);
return;
}
- const MCInstrDesc *MCID = &MI->getDesc();
+ const MCInstrDesc *MCID = &MI.getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
// operand is still set to noreg. If needed, set the optional operand's
@@ -8263,24 +8533,24 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
// Rename pseudo opcodes.
- unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
+ unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
if (NewOpc) {
const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
- assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
+ assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 &&
"converted opcode should be the same except for cc_out");
- MI->setDesc(*MCID);
+ MI.setDesc(*MCID);
// Add the optional cc_out operand
- MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
+ MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
}
unsigned ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
- if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
+ if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
@@ -8288,14 +8558,14 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// since we already have an optional CPSR def.
bool definesCPSR = false;
bool deadCPSR = false;
- for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
- i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
+ ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
definesCPSR = true;
if (MO.isDead())
deadCPSR = true;
- MI->RemoveOperand(i);
+ MI.RemoveOperand(i);
break;
}
}
@@ -8305,14 +8575,14 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
}
assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
if (deadCPSR) {
- assert(!MI->getOperand(ccOutIdx).getReg() &&
+ assert(!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand");
return;
}
// If this instruction was defined with an optional CPSR def and its dag node
// had a live implicit CPSR def, then activate the optional CPSR def.
- MachineOperand &MO = MI->getOperand(ccOutIdx);
+ MachineOperand &MO = MI.getOperand(ccOutIdx);
MO.setReg(ARM::CPSR);
MO.setIsDef(true);
}
@@ -8442,16 +8712,12 @@ SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N0.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
- if (Result.getNode())
+ if (N0.getNode()->hasOneUse())
+ if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
return Result;
- }
- if (N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
- if (Result.getNode())
+ if (N1.getNode()->hasOneUse())
+ if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
return Result;
- }
return SDValue();
}
@@ -8533,7 +8799,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
// Get widened type and narrowed type.
MVT widenType;
unsigned numElem = VT.getVectorNumElements();
-
+
EVT inputLaneType = Vec.getValueType().getVectorElementType();
switch (inputLaneType.getSimpleVT().SimpleTy) {
case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
@@ -8559,11 +8825,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
- if (Subtarget->isThumb1Only()) return SDValue();
-
- // Only perform the checks after legalize when the pattern is available.
- if (DCI.isBeforeLegalize()) return SDValue();
-
// Look for multiply add opportunities.
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
@@ -8691,14 +8952,97 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
return resNode;
}
+static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // UMAAL is similar to UMLAL except that it adds two unsigned values.
+ // While trying to combine for the other MLAL nodes, first search for the
+ // chance to use UMAAL. Check if Addc uses another addc node which can first
+ // be combined into a UMLAL. The other pattern is AddcNode being combined
+ // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
+
+ if (!Subtarget->hasV6Ops())
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ SDNode *PrevAddc = nullptr;
+ if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
+ PrevAddc = AddcNode->getOperand(0).getNode();
+ else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
+ PrevAddc = AddcNode->getOperand(1).getNode();
+
+ // If there's no addc chains, just return a search for any MLAL.
+ if (PrevAddc == nullptr)
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ // Try to convert the addc operand to an MLAL and if that fails try to
+ // combine AddcNode.
+ SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
+ if (MLAL != SDValue(PrevAddc, 0))
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ // Find the converted UMAAL or quit if it doesn't exist.
+ SDNode *UmlalNode = nullptr;
+ SDValue AddHi;
+ if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
+ UmlalNode = AddcNode->getOperand(0).getNode();
+ AddHi = AddcNode->getOperand(1);
+ } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
+ UmlalNode = AddcNode->getOperand(1).getNode();
+ AddHi = AddcNode->getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
+ // the ADDC as well as Zero.
+ auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
+
+ if (!Zero || Zero->getZExtValue() != 0)
+ return SDValue();
+
+ // Check that we have a glued ADDC node.
+ if (AddcNode->getValueType(1) != MVT::Glue)
+ return SDValue();
+
+ // Look for the glued ADDE.
+ SDNode* AddeNode = AddcNode->getGluedUser();
+ if (!AddeNode)
+ return SDValue();
+
+ if ((AddeNode->getOperand(0).getNode() == Zero &&
+ AddeNode->getOperand(1).getNode() == UmlalNode) ||
+ (AddeNode->getOperand(0).getNode() == UmlalNode &&
+ AddeNode->getOperand(1).getNode() == Zero)) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
+ UmlalNode->getOperand(2), AddHi };
+ SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
+ DAG.getVTList(MVT::i32, MVT::i32), Ops);
+
+ // Replace the ADDs' nodes uses by the UMAAL node's values.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
+
+ // Return original node to notify the driver to stop replacing.
+ return SDValue(AddcNode, 0);
+ }
+ return SDValue();
+}
+
/// PerformADDCCombine - Target-specific dag combine transform from
-/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
+/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
+/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
static SDValue PerformADDCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
- return AddCombineTo64bitMLAL(N, DCI, Subtarget);
+ if (Subtarget->isThumb1Only()) return SDValue();
+ // Only perform the checks after legalize when the pattern is available.
+ if (DCI.isBeforeLegalize()) return SDValue();
+
+ return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
}
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
@@ -8710,15 +9054,13 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
const ARMSubtarget *Subtarget){
// Attempt to create vpaddl for this add.
- SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
- if (Result.getNode())
+ if (SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget))
return Result;
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
- if (N0.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
- if (Result.getNode()) return Result;
- }
+ if (N0.getNode()->hasOneUse())
+ if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
+ return Result;
return SDValue();
}
@@ -8731,8 +9073,7 @@ static SDValue PerformADDCombine(SDNode *N,
SDValue N1 = N->getOperand(1);
// First try with the default operand order.
- SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
- if (Result.getNode())
+ if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
return Result;
// If that didn't work, try again with the operands commuted.
@@ -8747,10 +9088,9 @@ static SDValue PerformSUBCombine(SDNode *N,
SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
- if (N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
- if (Result.getNode()) return Result;
- }
+ if (N1.getNode()->hasOneUse())
+ if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
+ return Result;
return SDValue();
}
@@ -8920,8 +9260,7 @@ static SDValue PerformANDCombine(SDNode *N,
if (!Subtarget->isThumb1Only()) {
// fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
- SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
- if (Result.getNode())
+ if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
return Result;
}
@@ -8963,8 +9302,7 @@ static SDValue PerformORCombine(SDNode *N,
if (!Subtarget->isThumb1Only()) {
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
- SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
- if (Result.getNode())
+ if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
}
@@ -9137,8 +9475,7 @@ static SDValue PerformXORCombine(SDNode *N,
if (!Subtarget->isThumb1Only()) {
// fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
- SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
- if (Result.getNode())
+ if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
}
@@ -9300,17 +9637,15 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(LD);
SDValue BasePtr = LD->getBasePtr();
- SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
- LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment());
+ SDValue NewLD1 =
+ DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
+ LD->getAlignment(), LD->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
- SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
- LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- std::min(4U, LD->getAlignment() / 2));
+ SDValue NewLD2 = DAG.getLoad(
+ MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
+ std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
if (DCI.DAG.getDataLayout().isBigEndian())
@@ -9364,11 +9699,9 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
// into a pair of GPRs, which is fine when the value is used as a scalar,
// but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
SelectionDAG &DAG = DCI.DAG;
- if (N->getNumOperands() == 2) {
- SDValue RV = PerformVMOVDRRCombine(N, DAG);
- if (RV.getNode())
+ if (N->getNumOperands() == 2)
+ if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
return RV;
- }
// Load i64 elements as f64 values so that type legalization does not split
// them up into i32 values.
@@ -9385,7 +9718,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
DCI.AddToWorklist(V.getNode());
}
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);
+ SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
@@ -9434,7 +9767,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// Assume only bit cast to i32 will go away.
if (Elt->getOperand(0).getValueType() == MVT::i32)
++NumOfBitCastedElts;
- } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt))
+ } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
// Constants are statically casted, thus do not count them as
// relevant operands.
--NumOfRelevantElts;
@@ -9461,7 +9794,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDLoc dl(N);
for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
SDValue V = N->getOperand(Idx);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
if (V.getOpcode() == ISD::BITCAST &&
V->getOperand(0).getValueType() == MVT::i32)
@@ -9529,8 +9862,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
SDValue Concat0Op1 = Op0.getOperand(1);
SDValue Concat1Op1 = Op1.getOperand(1);
- if (Concat0Op1.getOpcode() != ISD::UNDEF ||
- Concat1Op1.getOpcode() != ISD::UNDEF)
+ if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
return SDValue();
// Skip the transformation if any of the types are illegal.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -9557,7 +9889,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
NewMask.push_back(NewElt);
}
return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
- DAG.getUNDEF(VT), NewMask.data());
+ DAG.getUNDEF(VT), NewMask);
}
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
@@ -9953,7 +10285,7 @@ static SDValue PerformSTORECombine(SDNode *N,
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
DAG.getUNDEF(WideVec.getValueType()),
- ShuffleVec.data());
+ ShuffleVec);
// At this point all of the data is stored at the bottom of the
// register. We now need to save it to mem.
@@ -9984,8 +10316,8 @@ static SDValue PerformSTORECombine(SDNode *N,
StoreType, ShuffWide,
DAG.getIntPtrConstant(I, DL));
SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
Increment);
Chains.push_back(Ch);
@@ -10004,18 +10336,18 @@ static SDValue PerformSTORECombine(SDNode *N,
bool isBigEndian = DAG.getDataLayout().isBigEndian();
SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
- SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
- BasePtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ SDValue NewST1 = DAG.getStore(
+ St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
+ BasePtr, St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
return DAG.getStore(NewST1.getValue(0), DL,
StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
- OffsetPtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(),
- std::min(4U, St->getAlignment() / 2));
+ OffsetPtr, St->getPointerInfo(),
+ std::min(4U, St->getAlignment() / 2),
+ St->getMemOperand()->getFlags());
}
if (StVal.getValueType() == MVT::i64 &&
@@ -10038,9 +10370,8 @@ static SDValue PerformSTORECombine(SDNode *N,
DCI.AddToWorklist(ExtElt.getNode());
DCI.AddToWorklist(V.getNode());
return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment(),
- St->getAAInfo());
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags(), St->getAAInfo());
}
// If this is a legal vector store, try to combine it into a VST1_UPD.
@@ -10066,7 +10397,8 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue Op = N->getOperand(0);
- if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
+ if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
+ Op.getOpcode() != ISD::FMUL)
return SDValue();
SDValue ConstVec = Op->getOperand(1);
@@ -10123,7 +10455,7 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
SDValue Op = N->getOperand(0);
unsigned OpOpcode = Op.getNode()->getOpcode();
- if (!N->getValueType(0).isVector() ||
+ if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
(OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
return SDValue();
@@ -10464,7 +10796,7 @@ static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
// The operand to BFI is already a mask suitable for removing the bits it
// sets.
ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
- APInt Mask = CI->getAPIntValue();
+ const APInt &Mask = CI->getAPIntValue();
KnownZero &= Mask;
KnownOne &= Mask;
return;
@@ -10522,7 +10854,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
} else {
assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
}
-
+
if (Op1->getOpcode() != ISD::OR)
return SDValue();
@@ -10552,7 +10884,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
SDLoc dl(X);
EVT VT = X.getValueType();
unsigned BitInX = AndC->getAPIntValue().logBase2();
-
+
if (BitInX != 0) {
// We must shift X first.
X = DAG.getNode(ISD::SRL, dl, VT, X,
@@ -10573,6 +10905,46 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
return V;
}
+/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
+SDValue
+ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
+ SDValue Cmp = N->getOperand(4);
+ if (Cmp.getOpcode() != ARMISD::CMPZ)
+ // Only looking at NE cases.
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue LHS = Cmp.getOperand(0);
+ SDValue RHS = Cmp.getOperand(1);
+ SDValue Chain = N->getOperand(0);
+ SDValue BB = N->getOperand(1);
+ SDValue ARMcc = N->getOperand(2);
+ ARMCC::CondCodes CC =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+ // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
+ // -> (brcond Chain BB CC CPSR Cmp)
+ if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
+ LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
+ LHS->getOperand(0)->hasOneUse()) {
+ auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
+ auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
+ auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
+ if ((LHS00C && LHS00C->getZExtValue() == 0) &&
+ (LHS01C && LHS01C->getZExtValue() == 1) &&
+ (LHS1C && LHS1C->getZExtValue() == 1) &&
+ (RHSC && RHSC->getZExtValue() == 0)) {
+ return DAG.getNode(
+ ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
+ LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
+ }
+ }
+
+ return SDValue();
+}
+
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
SDValue
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
@@ -10626,6 +10998,21 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
N->getOperand(3), NewCmp);
}
+ // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
+ // -> (cmov F T CC CPSR Cmp)
+ if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
+ auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
+ auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
+ if ((LHS0C && LHS0C->getZExtValue() == 0) &&
+ (LHS1C && LHS1C->getZExtValue() == 1) &&
+ (RHSC && RHSC->getZExtValue() == 0)) {
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ LHS->getOperand(2), LHS->getOperand(3),
+ LHS->getOperand(4));
+ }
+ }
+
if (Res.getNode()) {
APInt KnownZero, KnownOne;
DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
@@ -10676,6 +11063,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
@@ -11198,22 +11586,37 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
- if (Subtarget->isThumb1Only())
- return false;
-
EVT VT;
SDValue Ptr;
- bool isSEXTLoad = false;
+ bool isSEXTLoad = false, isNonExt;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
+ isNonExt = !ST->isTruncatingStore();
} else
return false;
+ if (Subtarget->isThumb1Only()) {
+ // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
+ // must be non-extending/truncating, i32, with an offset of 4.
+ assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
+ if (Op->getOpcode() != ISD::ADD || !isNonExt)
+ return false;
+ auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!RHS || RHS->getZExtValue() != 4)
+ return false;
+
+ Offset = Op->getOperand(1);
+ Base = Op->getOperand(0);
+ AM = ISD::POST_INC;
+ return true;
+ }
+
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
@@ -11322,6 +11725,26 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
return false;
}
+const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
+ // At this point, we have to lower this constraint to something else, so we
+ // lower it to an "r" or "w". However, by doing this we will force the result
+ // to be in register, while the X constraint is much more permissive.
+ //
+ // Although we are correct (we are free to emit anything, without
+ // constraints), we might break use cases that would expect us to be more
+ // efficient and emit something else.
+ if (!Subtarget->hasVFP2())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "w";
+ if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
+ (ConstraintVT.getSizeInBits() == 64 ||
+ ConstraintVT.getSizeInBits() == 128))
+ return "w";
+
+ return "r";
+}
+
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
@@ -11640,7 +12063,8 @@ static TargetLowering::ArgListTy getDivRemArgList(
}
SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
- assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) &&
+ assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
+ Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) &&
"Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
@@ -11664,7 +12088,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
@@ -11702,7 +12126,7 @@ SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
// Lower call
CallLoweringInfo CLI(DAG);
CLI.setChain(InChain)
- .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0)
+ .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
.setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
@@ -11950,23 +12374,20 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
AtomicOrdering Ord, bool IsStore,
bool IsLoad) const {
- if (!getInsertFencesForAtomic())
- return nullptr;
-
switch (Ord) {
- case NotAtomic:
- case Unordered:
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
llvm_unreachable("Invalid fence: unordered/non-atomic");
- case Monotonic:
- case Acquire:
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Acquire:
return nullptr; // Nothing to do
- case SequentiallyConsistent:
+ case AtomicOrdering::SequentiallyConsistent:
if (!IsStore)
return nullptr; // Nothing to do
/*FALLTHROUGH*/
- case Release:
- case AcquireRelease:
- if (Subtarget->isSwift())
+ case AtomicOrdering::Release:
+ case AtomicOrdering::AcquireRelease:
+ if (Subtarget->preferISHSTBarriers())
return makeDMB(Builder, ARM_MB::ISHST);
// FIXME: add a comment with a link to documentation justifying this.
else
@@ -11978,19 +12399,16 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
AtomicOrdering Ord, bool IsStore,
bool IsLoad) const {
- if (!getInsertFencesForAtomic())
- return nullptr;
-
switch (Ord) {
- case NotAtomic:
- case Unordered:
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
llvm_unreachable("Invalid fence: unordered/not-atomic");
- case Monotonic:
- case Release:
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Release:
return nullptr; // Nothing to do
- case Acquire:
- case AcquireRelease:
- case SequentiallyConsistent:
+ case AtomicOrdering::Acquire:
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
return makeDMB(Builder, ARM_MB::ISH);
}
llvm_unreachable("Unknown fence ordering in emitTrailingFence");
@@ -12031,7 +12449,17 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
- return true;
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement cmpxchg without spilling. If the address being exchanged is also
+ // on the stack and close enough to the spill slot, this can lead to a
+ // situation where the monitor always gets cleared and the atomic operation
+ // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+ return getTargetMachine().getOptLevel() != 0;
+}
+
+bool ARMTargetLowering::shouldInsertFencesForAtomic(
+ const Instruction *I) const {
+ return InsertFencesForAtomic;
}
// This has so far only been implemented for MachO.
@@ -12080,7 +12508,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
- bool IsAcquire = isAtLeastAcquire(Ord);
+ bool IsAcquire = isAcquireOrStronger(Ord);
// Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i32, i32} and we have to recombine them into a
@@ -12124,7 +12552,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- bool IsRelease = isAtLeastRelease(Ord);
+ bool IsRelease = isReleaseOrStronger(Ord);
// Since the intrinsics must have legal type, the i64 intrinsics take two
// parameters: "i32, i32". We must marshal Val into the appropriate form
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 96b56c3ec330..4906686616bc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -43,7 +43,6 @@ namespace llvm {
CALL, // Function call.
CALL_PRED, // Function call that's predicable.
CALL_NOLINK, // Function call with branch not branch-and-link.
- tCALL, // Thumb function call.
BRCOND, // Conditional branch.
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
@@ -61,6 +60,8 @@ namespace llvm {
CMOV, // ARM conditional move instructions.
+ SSAT, // Signed saturation
+
BCC_i64,
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
@@ -164,6 +165,7 @@ namespace llvm {
UMLAL, // 64bit Unsigned Accumulate Multiply
SMLAL, // 64bit Signed Accumulate Multiply
+ UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
@@ -251,13 +253,14 @@ namespace llvm {
EVT VT) const override;
MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const override;
+ EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *MBB) const override;
- void AdjustInstrPostInstrSelection(MachineInstr *MI,
+ void AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const override;
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+ SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
@@ -335,6 +338,8 @@ namespace llvm {
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
+ const char *LowerXConstraint(EVT ConstraintVT) const override;
+
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
/// true it means one of the asm constraint of the inline asm instruction
@@ -453,6 +458,7 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
+ bool shouldInsertFencesForAtomic(const Instruction *I) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
@@ -468,6 +474,14 @@ namespace llvm {
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool supportSwiftError() const override {
+ return true;
+ }
+
+ bool hasStandaloneRem(EVT VT) const override {
+ return HasStandaloneRem;
+ }
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -486,29 +500,34 @@ namespace llvm {
///
unsigned ARMPCLabelIndex;
+ // TODO: remove this, and have shouldInsertFencesForAtomic do the proper
+ // check.
+ bool InsertFencesForAtomic;
+
+ bool HasStandaloneRem = true;
+
void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
- void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
- SDValue Chain, SDValue &Arg,
- RegsToPassVector &RegsToPass,
+ void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain,
+ SDValue &Arg, RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
SDValue &StackPtr,
SmallVectorImpl<SDValue> &MemOpChains,
ISD::ArgFlagsTy Flags) const;
SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
- SDLoc dl) const;
+ const SDLoc &dl) const;
CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
bool isVarArg) const;
CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
- SDLoc dl, SelectionDAG &DAG,
+ const SDLoc &dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -527,6 +546,7 @@ namespace llvm {
SelectionDAG &DAG,
TLSModel::Model model) const;
SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
@@ -576,9 +596,9 @@ namespace llvm {
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- bool isThisReturn, SDValue ThisVal) const;
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
+ SDValue ThisVal) const;
bool supportSplitCSR(MachineFunction *MF) const override {
return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
@@ -590,23 +610,19 @@ namespace llvm {
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
SDValue
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const override;
-
- int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
- SDLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned InRegsParamRecordIdx,
- int ArgOffset,
+ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl,
+ SDValue &Chain, const Value *OrigArg,
+ unsigned InRegsParamRecordIdx, int ArgOffset,
unsigned ArgSize) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- SDLoc dl, SDValue &Chain,
- unsigned ArgOffset,
- unsigned TotalArgRegsSaveSize,
+ const SDLoc &dl, SDValue &Chain,
+ unsigned ArgOffset, unsigned TotalArgRegsSaveSize,
bool ForceMutable = false) const;
SDValue
@@ -634,42 +650,39 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const override;
- SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &dl, SelectionDAG &DAG) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
- SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
+ SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
- SDValue getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, SDLoc dl) const;
+ SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
+ SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ const SDLoc &dl) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
- void SetupEntryBlockForSjLj(MachineInstr *MI,
- MachineBasicBlock *MBB,
+ void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const;
- void EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const;
+ void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const;
- bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
+ bool RemapAddSubWithFlags(MachineInstr &MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *EmitStructByval(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *EmitLowered__chkstk(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- MachineBasicBlock *EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
MachineBasicBlock *MBB) const;
};
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index e79608d360ca..37a83f70a1fb 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -246,23 +246,33 @@ def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> {
let ParserMatchClass = shr_imm64_asm_operand;
}
+
+// ARM Assembler operand for ldr Rd, =expression which generates an offset
+// to a constant pool entry or a MOV depending on the value of expression
+def const_pool_asm_operand : AsmOperandClass { let Name = "ConstPoolAsmImm"; }
+def const_pool_asm_imm : Operand<i32> {
+ let ParserMatchClass = const_pool_asm_operand;
+}
+
+
//===----------------------------------------------------------------------===//
// ARM Assembler alias templates.
//
-class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1>
- : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>;
-class tInstAlias<string Asm, dag Result, bit Emit = 0b1>
- : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>;
-class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
- : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
-class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
- : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
-class VFP2DPInstAlias<string Asm, dag Result, bit Emit = 0b1>
- : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2,HasDPVFP]>;
-class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
- : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
-class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
- : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+// Note: When EmitPriority == 1, the alias will be used for printing
+class ARMInstAlias<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[IsARM]>;
+class tInstAlias<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb]>;
+class t2InstAlias<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb2]>;
+class VFP2InstAlias<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2]>;
+class VFP2DPInstAlias<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2,HasDPVFP]>;
+class VFP3InstAlias<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[HasNEON]>;
class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
@@ -563,12 +573,12 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
class AIldaex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: AIldr_ex_or_acq<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
- Requires<[IsARM, HasV8]>;
+ Requires<[IsARM, HasAcquireRelease, HasV7Clrex]>;
class AIstlex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: AIstr_ex_or_rel<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
- Requires<[IsARM, HasV8]> {
+ Requires<[IsARM, HasAcquireRelease, HasV7Clrex]> {
bits<4> Rd;
let Inst{15-12} = Rd;
}
@@ -593,12 +603,12 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
class AIldracq<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: AIldr_ex_or_acq<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
- Requires<[IsARM, HasV8]>;
+ Requires<[IsARM, HasAcquireRelease]>;
class AIstrrel<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: AIstr_ex_or_rel<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
- Requires<[IsARM, HasV8]> {
+ Requires<[IsARM, HasAcquireRelease]> {
let Inst{15-12} = 0b1111;
}
@@ -1379,11 +1389,6 @@ class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre,
let DecoderMethod = "DecodeT2LdStPre";
}
-// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
-class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
- list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
-}
-
// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
@@ -1495,6 +1500,32 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
let D = VFPNeonDomain;
}
+class AHI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
+ list<Predicate> Predicates = [HasFullFP16];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Sd{0};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Sd{4-1};
+ let Inst{7-0} = addr{7-0}; // imm8
+
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1001; // Half precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
// VFP Load / store multiple pseudo instructions.
class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
list<dag> pattern>
@@ -1817,6 +1848,114 @@ class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{22} = Sd{0};
}
+// Half precision, unary, predicated
+class AHuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ list<Predicate> Predicates = [HasFullFP16];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-8} = 0b1001; // Half precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Half precision, unary, non-predicated
+class AHuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+ bit opcod5, dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone,
+ VFPUnaryFrm, itin, asm, "", pattern> {
+ list<Predicate> Predicates = [HasFullFP16];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{19-16} = opcod3;
+ let Inst{11-8} = 0b1001; // Half precision
+ let Inst{7-6} = opcod4;
+ let Inst{4} = opcod5;
+}
+
+// Half precision, binary
+class AHbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ list<Predicate> Predicates = [HasFullFP16];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1001; // Half precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+}
+
+// Half precision, binary, not predicated
+class AHbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
+ InstrItinClass itin, string asm, list<dag> pattern>
+ : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone,
+ VFPBinaryFrm, itin, asm, "", pattern> {
+ list<Predicate> Predicates = [HasFullFP16];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ let Inst{31-28} = 0b1111;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1001; // Half precision
+ let Inst{6} = opcod3;
+ let Inst{4} = 0;
+}
+
// VFP conversion instructions
class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
dag oops, dag iops, InstrItinClass itin, string opc, string asm,
@@ -2321,22 +2460,25 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
}
// VFP/NEON Instruction aliases for type suffices.
-class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
- InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+// Note: When EmitPriority == 1, the alias will be used for printing
+class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result, bit EmitPriority = 0> :
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result, EmitPriority>, Requires<[HasVFP2]>;
-multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
- def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
- def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
- def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
- def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+// Note: When EmitPriority == 1, the alias will be used for printing
+multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result, bit EmitPriority = 0> {
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result, EmitPriority>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result, EmitPriority>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result, EmitPriority>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result, EmitPriority>;
}
-multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> {
+// Note: When EmitPriority == 1, the alias will be used for printing
+multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result, bit EmitPriority = 0> {
let Predicates = [HasNEON] in {
- def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
- def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
- def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
- def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result, EmitPriority>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result, EmitPriority>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result, EmitPriority>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result, EmitPriority>;
}
}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index cf973d68085f..98b1b4ca4272 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -90,29 +90,29 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
- Reloc::Model RM) const {
+void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ const TargetMachine &TM = MF.getTarget();
if (!Subtarget.useMovt(MF)) {
- if (RM == Reloc::PIC_)
- expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12, RM);
+ if (TM.isPositionIndependent())
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
- expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12, RM);
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12);
return;
}
- if (RM != Reloc::PIC_) {
- expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12, RM);
+ if (!TM.isPositionIndependent()) {
+ expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12);
return;
}
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
- if (!Subtarget.GVIsIndirectSymbol(GV, RM)) {
- expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12, RM);
+ if (!Subtarget.isGVIndirectSymbol(GV)) {
+ expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12);
return;
}
@@ -123,9 +123,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg)
.addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
- unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
MIB.addMemOperand(MMO);
MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 90f34ea08401..4b1b7097b18d 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -39,8 +39,7 @@ public:
const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
private:
- void expandLoadStackGuard(MachineBasicBlock::iterator MI,
- Reloc::Model RM) const override;
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c446ba3109e4..060376b0a273 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -90,12 +90,6 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
SDTCisVT<1, i32>,
SDTCisVT<4, i32>]>;
-def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>,
- SDTCisVT<2, i32>, SDTCisVT<3, i32>,
- SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
-def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
-def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
-
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
@@ -128,6 +122,8 @@ def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
+def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
+
def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
@@ -201,6 +197,12 @@ def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
AssemblerPredicate<"HasV6MOps",
"armv6m or armv6t2">;
+def HasV8MBaseline : Predicate<"Subtarget->hasV8MBaselineOps()">,
+ AssemblerPredicate<"HasV8MBaselineOps",
+ "armv8m.base">;
+def HasV8MMainline : Predicate<"Subtarget->hasV8MMainlineOps()">,
+ AssemblerPredicate<"HasV8MMainlineOps",
+ "armv8m.main">;
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
@@ -235,6 +237,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasRAS : Predicate<"Subtarget->hasRAS()">,
+ AssemblerPredicate<"FeatureRAS", "ras">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
@@ -251,6 +255,12 @@ def HasDSP : Predicate<"Subtarget->hasDSP()">,
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
AssemblerPredicate<"FeatureDB",
"data-barriers">;
+def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">,
+ AssemblerPredicate<"FeatureV7Clrex",
+ "v7 clrex">;
+def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">,
+ AssemblerPredicate<"FeatureAcquireRelease",
+ "acquire/release">;
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
AssemblerPredicate<"FeatureMP",
"mp-extensions">;
@@ -260,6 +270,9 @@ def HasVirtualization: Predicate<"false">,
def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
AssemblerPredicate<"FeatureTrustZone",
"TrustZone">;
+def Has8MSecExt : Predicate<"Subtarget->has8MSecExt()">,
+ AssemblerPredicate<"Feature8MSecExt",
+ "ARMv8-M Security Extensions">;
def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
@@ -279,6 +292,8 @@ def IsARM : Predicate<"!Subtarget->isThumb()">,
def IsMachO : Predicate<"Subtarget->isTargetMachO()">;
def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
+def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">;
def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
@@ -301,19 +316,16 @@ def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion =="
" Subtarget->hasVFP4()) || "
"Subtarget->isTargetDarwin()">;
-// VGETLNi32 is microcoded on Swift - prefer VMOV.
-def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">;
-def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">;
+def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
-// VDUP.32 is microcoded on Swift - prefer VMOV.
-def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">;
-def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
+def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">;
-// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as
-// this allows more effective execution domain optimization. See
-// setExecutionDomain().
-def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
-def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
+def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
+ "!Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
+ "Subtarget->useNEONForSinglePrecisionFP()">;
def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
@@ -360,8 +372,6 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
}], hi16>;
-class BinOpWithFlagFrag<dag res> :
- PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
@@ -408,34 +418,35 @@ def brtarget : Operand<OtherVT> {
let DecoderMethod = "DecodeT2BROperand";
}
-// FIXME: get rid of this one?
-def uncondbrtarget : Operand<OtherVT> {
- let EncoderMethod = "getUnconditionalBranchTargetOpValue";
- let OperandType = "OPERAND_PCREL";
+// Branches targeting ARM-mode must be divisible by 4 if they're a raw
+// immediate.
+def ARMBranchTarget : AsmOperandClass {
+ let Name = "ARMBranchTarget";
}
-// Branch target for ARM. Handles conditional/unconditional
-def br_target : Operand<OtherVT> {
- let EncoderMethod = "getARMBranchTargetOpValue";
- let OperandType = "OPERAND_PCREL";
+// Branches targeting Thumb-mode must be divisible by 2 if they're a raw
+// immediate.
+def ThumbBranchTarget : AsmOperandClass {
+ let Name = "ThumbBranchTarget";
}
-// Call target.
-// FIXME: rename bltarget to t2_bl_target?
-def bltarget : Operand<i32> {
- // Encoded the same as branch targets.
- let EncoderMethod = "getBranchTargetOpValue";
+def arm_br_target : Operand<OtherVT> {
+ let ParserMatchClass = ARMBranchTarget;
+ let EncoderMethod = "getARMBranchTargetOpValue";
let OperandType = "OPERAND_PCREL";
}
// Call target for ARM. Handles conditional/unconditional
// FIXME: rename bl_target to t2_bltarget?
-def bl_target : Operand<i32> {
+def arm_bl_target : Operand<i32> {
+ let ParserMatchClass = ARMBranchTarget;
let EncoderMethod = "getARMBLTargetOpValue";
let OperandType = "OPERAND_PCREL";
}
-def blx_target : Operand<i32> {
+// Target for BLX *from* ARM mode.
+def arm_blx_target : Operand<i32> {
+ let ParserMatchClass = ThumbBranchTarget;
let EncoderMethod = "getARMBLXTargetOpValue";
let OperandType = "OPERAND_PCREL";
}
@@ -981,6 +992,21 @@ def addrmode5_pre : AddrMode5 {
let PrintMethod = "printAddrMode5Operand<true>";
}
+// addrmode5fp16 := reg +/- imm8*2
+//
+def AddrMode5FP16AsmOperand : AsmOperandClass { let Name = "AddrMode5FP16"; }
+class AddrMode5FP16 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5FP16", []> {
+ let EncoderMethod = "getAddrMode5FP16OpValue";
+ let DecoderMethod = "DecodeAddrMode5FP16Operand";
+ let ParserMatchClass = AddrMode5FP16AsmOperand;
+ let MIOperandInfo = (ops GPR:$base, i32imm);
+}
+
+def addrmode5fp16 : AddrMode5FP16 {
+ let PrintMethod = "printAddrMode5FP16Operand<false>";
+}
+
// addrmode6 := reg with optional alignment
//
def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
@@ -1224,7 +1250,7 @@ include "ARMInstrFormats.td"
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
+ SDPatternOperator opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1297,7 +1323,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
+ SDNode opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1369,7 +1395,7 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand.
let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
- InstrItinClass iis, PatFrag opnode,
+ InstrItinClass iis, SDNode opnode,
bit Commutable = 0> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
4, iii,
@@ -1402,7 +1428,7 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
/// operands are reversed.
let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
- InstrItinClass iis, PatFrag opnode,
+ InstrItinClass iis, SDNode opnode,
bit Commutable = 0> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
4, iii,
@@ -1431,8 +1457,8 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
let isCompare = 1, Defs = [CPSR] in {
multiclass AI1_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0,
- string rrDecoderMethod = ""> {
+ SDPatternOperator opnode, bit Commutable = 0,
+ string rrDecoderMethod = ""> {
def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
opc, "\t$Rn, $imm",
[(opnode GPR:$Rn, mod_imm:$imm)]>,
@@ -1561,7 +1587,7 @@ class AI_exta_rrot_np<bits<8> opcod, string opc>
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
let TwoOperandAliasConstraint = "$Rn = $Rd" in
-multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, SDNode opnode,
bit Commutable = 0> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm),
@@ -1632,7 +1658,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
/// AI1_rsc_irs - Define instructions and patterns for rsc
let TwoOperandAliasConstraint = "$Rn = $Rd" in
-multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, SDNode opnode> {
let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1880,6 +1906,7 @@ def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
bits<8> imm;
let Inst{27-8} = 0b00110010000011110000;
let Inst{7-0} = imm;
+ let DecoderMethod = "DecodeHINTInstruction";
}
def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>;
@@ -1888,6 +1915,7 @@ def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>;
def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>;
def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>;
def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
+def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
@@ -1915,7 +1943,7 @@ def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
let Inst{7-4} = 0b0111;
}
// default immediate for breakpoint mnemonic
-def : InstAlias<"bkpt", (BKPT 0)>, Requires<[IsARM]>;
+def : InstAlias<"bkpt", (BKPT 0), 0>, Requires<[IsARM]>;
def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
"hlt", "\t$val", []>, Requires<[IsARM, HasV8]> {
@@ -2181,7 +2209,7 @@ let isCall = 1,
// at least be a pseudo instruction expanding to the predicated version
// at MC lowering time.
Defs = [LR], Uses = [SP] in {
- def BL : ABXI<0b1011, (outs), (ins bl_target:$func),
+ def BL : ABXI<0b1011, (outs), (ins arm_bl_target:$func),
IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsARM]>, Sched<[WriteBrL]> {
@@ -2191,7 +2219,7 @@ let isCall = 1,
let DecoderMethod = "DecodeBranchImmInstruction";
}
- def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func),
+ def BL_pred : ABI<0b1011, (outs), (ins arm_bl_target:$func),
IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
Requires<[IsARM]>, Sched<[WriteBrL]> {
@@ -2232,7 +2260,7 @@ let isCall = 1,
// mov lr, pc; b if callee is marked noreturn to avoid confusing the
// return stack predictor.
- def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func),
+ def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func),
8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
Requires<[IsARM]>, Sched<[WriteBr]>;
}
@@ -2240,7 +2268,7 @@ let isCall = 1,
let isBranch = 1, isTerminator = 1 in {
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
// a two-value operand where a dag node expects two operands. :(
- def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+ def Bcc : ABI<0b1010, (outs), (ins arm_br_target:$target),
IIC_Br, "b", "\t$target",
[/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>,
Sched<[WriteBr]> {
@@ -2255,8 +2283,9 @@ let isBranch = 1, isTerminator = 1 in {
// FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
// should be sufficient.
// FIXME: Is B really a Barrier? That doesn't seem right.
- def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
- [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>,
+ def B : ARMPseudoExpand<(outs), (ins arm_br_target:$target), 4, IIC_Br,
+ [(br bb:$target)], (Bcc arm_br_target:$target,
+ (ops 14, zero_reg))>,
Sched<[WriteBr]>;
let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
@@ -2283,7 +2312,7 @@ let isBranch = 1, isTerminator = 1 in {
}
// BLX (immediate)
-def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary,
+def BLXi : AXI<(outs), (ins arm_blx_target:$target), BrMiscFrm, NoItinerary,
"blx\t$target", []>,
Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
let Inst{31-25} = 0b1111101;
@@ -2313,9 +2342,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>,
Sched<[WriteBr]>;
- def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst),
+ def TAILJMPd : ARMPseudoExpand<(outs), (ins arm_br_target:$dst),
4, IIC_Br, [],
- (Bcc br_target:$dst, (ops 14, zero_reg))>,
+ (Bcc arm_br_target:$dst, (ops 14, zero_reg))>,
Requires<[IsARM]>, Sched<[WriteBr]>;
def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst),
@@ -2467,14 +2496,12 @@ def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>,
// Load
-defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
- UnOpFrag<(load node:$Src)>>;
+defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, load>;
defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
- UnOpFrag<(zextloadi8 node:$Src)>>;
-defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
- BinOpFrag<(store node:$LHS, node:$RHS)>>;
+ zextloadi8>;
+defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, store>;
defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
- BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+ truncstorei8>;
// Special LDR for loads from non-pc-relative constpools.
let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0,
@@ -2764,6 +2791,12 @@ def LDRBT_POST
: ARMAsmPseudo<"ldrbt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q),
(outs GPR:$Rt)>;
+// Pseudo instruction ldr Rt, =immediate
+def LDRConstPool
+ : ARMAsmPseudo<"ldr${q} $Rt, $immediate",
+ (ins const_pool_asm_imm:$immediate, pred:$q),
+ (outs GPR:$Rt)>;
+
// Store
// Stores with truncate
@@ -3299,8 +3332,8 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm),
}
def : InstAlias<"mov${p} $Rd, $imm",
- (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>,
- Requires<[IsARM]>;
+ (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p), 0>,
+ Requires<[IsARM, HasV6T2]>;
def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
@@ -3439,11 +3472,9 @@ def UBFX : I<(outs GPRnopc:$Rd),
//
defm ADD : AsI1_bin_irs<0b0100, "add",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr, add, 1>;
defm SUB : AsI1_bin_irs<0b0010, "sub",
- IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr, sub>;
// ADD and SUB with 's' bit set.
//
@@ -3455,27 +3486,21 @@ defm SUB : AsI1_bin_irs<0b0010, "sub",
// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen
// support for an optional CPSR definition that corresponds to the DAG
// node's second value. We can then eliminate the implicit def of CPSR.
-defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
-defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>;
+defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
-defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
-defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>;
defm RSB : AsI1_rbin_irs<0b0011, "rsb",
IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+ sub>;
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
-defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
-defm RSC : AI1_rsc_irs<0b0111, "rsc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+defm RSC : AI1_rsc_irs<0b0111, "rsc", ARMsube>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -3685,20 +3710,19 @@ def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
(SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
(USAT imm0_31:$pos, GPRnopc:$a, 0)>;
+def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
+ (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
//
defm AND : AsI1_bin_irs<0b0000, "and",
- IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr, and, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
- IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr, or, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
- IIC_iBITi, IIC_iBITr, IIC_iBITsr,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr, xor, 1>;
defm BIC : AsI1_bin_irs<0b1110, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
@@ -3923,9 +3947,10 @@ def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]> {
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rm;
@@ -3989,28 +4014,28 @@ def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]>;
-multiclass AI_smul<string opc, PatFrag opnode> {
+multiclass AI_smul<string opc> {
def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
(sext_inreg GPR:$Rm, i16)))]>,
Requires<[IsARM, HasV5TE]>;
def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
(sra GPR:$Rm, (i32 16))))]>,
Requires<[IsARM, HasV5TE]>;
def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
(sext_inreg GPR:$Rm, i16)))]>,
Requires<[IsARM, HasV5TE]>;
def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
(sra GPR:$Rm, (i32 16))))]>,
Requires<[IsARM, HasV5TE]>;
@@ -4026,13 +4051,13 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
-multiclass AI_smla<string opc, PatFrag opnode> {
+multiclass AI_smla<string opc> {
let DecoderMethod = "DecodeSMLAInstruction" in {
def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd, (add GPR:$Ra,
- (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (mul (sext_inreg GPRnopc:$Rn, i16),
(sext_inreg GPRnopc:$Rm, i16))))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
@@ -4040,7 +4065,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd,
- (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16),
+ (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16),
(sra GPRnopc:$Rm, (i32 16)))))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
@@ -4048,7 +4073,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd,
- (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
(sext_inreg GPRnopc:$Rm, i16))))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
@@ -4056,7 +4081,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd,
- (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)),
+ (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
(sra GPRnopc:$Rm, (i32 16)))))]>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
@@ -4074,8 +4099,8 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
}
-defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMUL : AI_smul<"smul">;
+defm SMLA : AI_smla<"smla">;
// Halfword multiply accumulate long: SMLAL<x><y>.
def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
@@ -4336,8 +4361,7 @@ def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan",
//
defm CMP : AI1_cmp_irs<0b1010, "cmp",
- IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
- BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, ARMcmp>;
// ARMcmpZ can re-use the above instruction definitions.
def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm),
@@ -4745,7 +4769,7 @@ def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return isAtLeastAcquire(Ordering);
+ return isAcquireOrStronger(Ordering);
}]>;
def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
@@ -4755,7 +4779,7 @@ def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return isAtLeastRelease(Ordering);
+ return isReleaseOrStronger(Ordering);
}]>;
def atomic_store_release_8 : releasing_store<atomic_store_8>;
@@ -4831,21 +4855,21 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
}
class ACI<dag oops, dag iops, string opc, string asm,
- IndexMode im = IndexModeNone>
+ list<dag> pattern, IndexMode im = IndexModeNone>
: I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
- opc, asm, "", []> {
+ opc, asm, "", pattern> {
let Inst{27-25} = 0b110;
}
class ACInoP<dag oops, dag iops, string opc, string asm,
- IndexMode im = IndexModeNone>
+ list<dag> pattern, IndexMode im = IndexModeNone>
: InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
- opc, asm, "", []> {
+ opc, asm, "", pattern> {
let Inst{31-28} = 0b1111;
let Inst{27-25} = 0b110;
}
-multiclass LdStCop<bit load, bit Dbit, string asm> {
+multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr"> {
+ asm, "\t$cop, $CRd, $addr", pattern> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -4861,7 +4885,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
let DecoderMethod = "DecodeCopMemInstruction";
}
def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
- asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ asm, "\t$cop, $CRd, $addr!", [], IndexModePre> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -4878,7 +4902,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
}
def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
postidx_imm8s4:$offset),
- asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ asm, "\t$cop, $CRd, $addr, $offset", [], IndexModePost> {
bits<9> offset;
bits<4> addr;
bits<4> cop;
@@ -4897,7 +4921,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
def _OPTION : ACI<(outs),
(ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
coproc_option_imm:$option),
- asm, "\t$cop, $CRd, $addr, $option"> {
+ asm, "\t$cop, $CRd, $addr, $option", []> {
bits<8> option;
bits<4> addr;
bits<4> cop;
@@ -4914,9 +4938,9 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
let DecoderMethod = "DecodeCopMemInstruction";
}
}
-multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
+multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr"> {
+ asm, "\t$cop, $CRd, $addr", pattern> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -4932,7 +4956,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
let DecoderMethod = "DecodeCopMemInstruction";
}
def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
- asm, "\t$cop, $CRd, $addr!", IndexModePre> {
+ asm, "\t$cop, $CRd, $addr!", [], IndexModePre> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -4949,7 +4973,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
}
def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
postidx_imm8s4:$offset),
- asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> {
+ asm, "\t$cop, $CRd, $addr, $offset", [], IndexModePost> {
bits<9> offset;
bits<4> addr;
bits<4> cop;
@@ -4968,7 +4992,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
def _OPTION : ACInoP<(outs),
(ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
coproc_option_imm:$option),
- asm, "\t$cop, $CRd, $addr, $option"> {
+ asm, "\t$cop, $CRd, $addr, $option", []> {
bits<8> option;
bits<4> addr;
bits<4> cop;
@@ -4986,14 +5010,15 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
}
}
-defm LDC : LdStCop <1, 0, "ldc">;
-defm LDCL : LdStCop <1, 1, "ldcl">;
-defm STC : LdStCop <0, 0, "stc">;
-defm STCL : LdStCop <0, 1, "stcl">;
-defm LDC2 : LdSt2Cop<1, 0, "ldc2">, Requires<[PreV8]>;
-defm LDC2L : LdSt2Cop<1, 1, "ldc2l">, Requires<[PreV8]>;
-defm STC2 : LdSt2Cop<0, 0, "stc2">, Requires<[PreV8]>;
-defm STC2L : LdSt2Cop<0, 1, "stc2l">, Requires<[PreV8]>;
+defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
+defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+
+defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
+defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register.
@@ -5118,9 +5143,9 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */,
(outs GPRnopc:$Rt, GPRnopc:$Rt2),
(ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
-class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
- : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
- GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary,
+class MovRRCopro2<string opc, bit direction, dag oops, dag iops,
+ list<dag> pattern = []>
+ : ABXI<0b1100, oops, iops, NoItinerary,
!strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>,
Requires<[PreV8]> {
let Inst{31-28} = 0b1111;
@@ -5139,13 +5164,18 @@ class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
let Inst{7-4} = opc1;
let Inst{3-0} = CRm;
- let DecoderMethod = "DecodeMRRC2";
+ let DecoderMethod = "DecoderForMRRC2AndMCRR2";
}
def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
+ (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
+ GPRnopc:$Rt2, c_imm:$CRm),
[(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, imm:$CRm)]>;
-def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */,
+ (outs GPRnopc:$Rt, GPRnopc:$Rt2),
+ (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
//===----------------------------------------------------------------------===//
// Move between special register and ARM core register
@@ -5164,7 +5194,7 @@ def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
let Unpredictable{11-0} = 0b110100001111;
}
-def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>,
+def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p), 0>,
Requires<[IsARM]>;
// The MRSsys instruction is the MRS instruction from the ARM ARM,
@@ -5206,6 +5236,7 @@ def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked),
// to distinguish between them. The mask operand contains the special register
// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be
// accessed in the special register.
+let Defs = [CPSR] in
def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
"msr", "\t$mask, $Rn", []> {
bits<5> mask;
@@ -5220,6 +5251,7 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
let Inst{3-0} = Rn;
}
+let Defs = [CPSR] in
def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary,
"msr", "\t$mask, $imm", []> {
bits<5> mask;
@@ -5268,8 +5300,8 @@ let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
let usesCustomInserter = 1, Defs = [CPSR] in
- def WIN__DBZCHK : PseudoInst<(outs), (ins GPR:$divisor), NoItinerary,
- [(win__dbzchk GPR:$divisor)]>;
+ def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary,
+ [(win__dbzchk tGPR:$divisor)]>;
//===----------------------------------------------------------------------===//
// TLS Instructions
@@ -5423,6 +5455,8 @@ def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)),
def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapper texternalsym :$dst), (MOVi32imm texternalsym :$dst)>,
+ Requires<[IsARM, UseMovt]>;
def : ARMPat<(ARMWrapperJT tjumptable:$dst),
(LEApcrelJT tjumptable:$dst)>;
@@ -5568,9 +5602,9 @@ include "ARMInstrNEON.td"
//
// Memory barriers
-def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>;
-def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>;
-def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>;
// System instructions
def : MnemonicAlias<"swi", "svc">;
@@ -5583,13 +5617,13 @@ def : MnemonicAlias<"stmfd", "stmdb">;
def : MnemonicAlias<"stmia", "stm">;
def : MnemonicAlias<"stmea", "stm">;
-// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
-// shift amount is zero (i.e., unspecified).
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT with the
+// input operands swapped when the shift amount is zero (i.e., unspecified).
def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
- (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p), 0>,
Requires<[IsARM, HasV6]>;
def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
- (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>,
+ (PKHBT GPRnopc:$Rd, GPRnopc:$Rm, GPRnopc:$Rn, 0, pred:$p), 0>,
Requires<[IsARM, HasV6]>;
// PUSH/POP aliases for STM/LDM
@@ -5747,23 +5781,23 @@ def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
// the instruction definitions need difference constraints pre-v6.
// Use these aliases for the assembly parsing on pre-v6.
def : InstAlias<"mul${s}${p} $Rd, $Rn, $Rm",
- (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
+ (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s), 0>,
Requires<[IsARM, NoV6]>;
def : InstAlias<"mla${s}${p} $Rd, $Rn, $Rm, $Ra",
(MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra,
- pred:$p, cc_out:$s)>,
+ pred:$p, cc_out:$s), 0>,
Requires<[IsARM, NoV6]>;
def : InstAlias<"smlal${s}${p} $RdLo, $RdHi, $Rn, $Rm",
- (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>,
Requires<[IsARM, NoV6]>;
def : InstAlias<"umlal${s}${p} $RdLo, $RdHi, $Rn, $Rm",
- (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>,
Requires<[IsARM, NoV6]>;
def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
- (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>,
Requires<[IsARM, NoV6]>;
def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
- (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>,
Requires<[IsARM, NoV6]>;
// 'it' blocks in ARM mode just validate the predicates. The IT itself
@@ -5775,3 +5809,36 @@ let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
NoItinerary,
[(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
+
+//===----------------------------------
+// Atomic cmpxchg for -O0
+//===----------------------------------
+
+// The fast register allocator used during -O0 inserts spills to cover any VRegs
+// live across basic block boundaries. When this happens between an LDXR and an
+// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to
+// fail.
+
+// Unfortunately, this means we have to have an alternative (expanded
+// post-regalloc) path for -O0 compilations. Fortunately this path can be
+// significantly more naive than the standard expansion: we conservatively
+// assume seq_cst, strong cmpxchg and omit clrex on failure.
+
+let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
+ mayLoad = 1, mayStore = 1 in {
+def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+ (ins GPR:$addr, GPR:$desired, GPR:$new),
+ NoItinerary, []>, Sched<[]>;
+
+def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+ (ins GPR:$addr, GPR:$desired, GPR:$new),
+ NoItinerary, []>, Sched<[]>;
+
+def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+ (ins GPR:$addr, GPR:$desired, GPR:$new),
+ NoItinerary, []>, Sched<[]>;
+
+def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status),
+ (ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
+ NoItinerary, []>, Sched<[]>;
+}
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 5b1f9a06442e..93a174f3678a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -15,10 +15,6 @@
// Thumb specific DAG Nodes.
//
-def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-
def imm_sr_XFORM: SDNodeXForm<imm, [{
unsigned Imm = N->getZExtValue();
return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
@@ -70,6 +66,14 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
}]>;
+def imm256_510 : ImmLeaf<i32, [{
+ return Imm >= 256 && Imm < 511;
+}]>;
+
+def thumb_imm256_510_addend : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() - 255, SDLoc(N), MVT::i32);
+}]>;
+
// Scaled 4 immediate.
def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; }
def t_imm0_1020s4 : Operand<i32> {
@@ -121,26 +125,38 @@ def t_adrlabel : Operand<i32> {
let ParserMatchClass = UnsignedOffset_b8s2;
}
-def t_bcctarget : Operand<i32> {
- let EncoderMethod = "getThumbBCCTargetOpValue";
- let DecoderMethod = "DecodeThumbBCCTargetOperand";
-}
-def t_cbtarget : Operand<i32> {
- let EncoderMethod = "getThumbCBTargetOpValue";
- let DecoderMethod = "DecodeThumbCmpBROperand";
+def thumb_br_target : Operand<OtherVT> {
+ let ParserMatchClass = ThumbBranchTarget;
+ let EncoderMethod = "getThumbBranchTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
}
-def t_bltarget : Operand<i32> {
+def thumb_bl_target : Operand<i32> {
+ let ParserMatchClass = ThumbBranchTarget;
let EncoderMethod = "getThumbBLTargetOpValue";
let DecoderMethod = "DecodeThumbBLTargetOperand";
}
-def t_blxtarget : Operand<i32> {
+// Target for BLX *from* thumb mode.
+def thumb_blx_target : Operand<i32> {
+ let ParserMatchClass = ARMBranchTarget;
let EncoderMethod = "getThumbBLXTargetOpValue";
let DecoderMethod = "DecodeThumbBLXOffset";
}
+def thumb_bcc_target : Operand<OtherVT> {
+ let ParserMatchClass = ThumbBranchTarget;
+ let EncoderMethod = "getThumbBCCTargetOpValue";
+ let DecoderMethod = "DecodeThumbBCCTargetOperand";
+}
+
+def thumb_cb_target : Operand<OtherVT> {
+ let ParserMatchClass = ThumbBranchTarget;
+ let EncoderMethod = "getThumbCBTargetOpValue";
+ let DecoderMethod = "DecodeThumbCmpBROperand";
+}
+
// t_addrmode_pc := <label> => pc + imm8 * 4
//
def t_addrmode_pc : MemOperand {
@@ -278,16 +294,17 @@ def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm",
let Inst{7-4} = imm;
}
-class tHintAlias<string Asm, dag Result> : tInstAlias<Asm, Result> {
+// Note: When EmitPriority == 1, the alias will be used for printing
+class tHintAlias<string Asm, dag Result, bit EmitPriority = 0> : tInstAlias<Asm, Result, EmitPriority> {
let Predicates = [IsThumb, HasV6M];
}
-def : tHintAlias<"nop$p", (tHINT 0, pred:$p)>; // A8.6.110
-def : tHintAlias<"yield$p", (tHINT 1, pred:$p)>; // A8.6.410
-def : tHintAlias<"wfe$p", (tHINT 2, pred:$p)>; // A8.6.408
-def : tHintAlias<"wfi$p", (tHINT 3, pred:$p)>; // A8.6.409
-def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157
-def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> {
+def : tHintAlias<"nop$p", (tHINT 0, pred:$p), 1>; // A8.6.110
+def : tHintAlias<"yield$p", (tHINT 1, pred:$p), 1>; // A8.6.410
+def : tHintAlias<"wfe$p", (tHINT 2, pred:$p), 1>; // A8.6.408
+def : tHintAlias<"wfi$p", (tHINT 3, pred:$p), 1>; // A8.6.409
+def : tHintAlias<"sev$p", (tHINT 4, pred:$p), 1>; // A8.6.157
+def : tInstAlias<"sevl$p", (tHINT 5, pred:$p), 1> {
let Predicates = [IsThumb2, HasV8];
}
@@ -302,7 +319,7 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
let Inst{7-0} = val;
}
// default immediate for breakpoint mnemonic
-def : InstAlias<"bkpt", (tBKPT 0)>, Requires<[IsThumb]>;
+def : InstAlias<"bkpt", (tBKPT 0), 0>, Requires<[IsThumb]>;
def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val",
[]>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> {
@@ -439,6 +456,14 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
let Inst{2-0} = 0b000;
let Unpredictable{2-0} = 0b111;
}
+ def tBXNS : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bxns${p}\t$Rm", []>,
+ Requires<[IsThumb, Has8MSecExt]>,
+ T1Special<{1,1,0,?}>, Sched<[WriteBr]> {
+ bits<4> Rm;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b100;
+ let Unpredictable{1-0} = 0b11;
+ }
}
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
@@ -458,9 +483,9 @@ let isCall = 1,
Defs = [LR], Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
- (outs), (ins pred:$p, t_bltarget:$func), IIC_Br,
+ (outs), (ins pred:$p, thumb_bl_target:$func), IIC_Br,
"bl${p}\t$func",
- [(ARMtcall tglobaladdr:$func)]>,
+ [(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{26} = func{23};
@@ -472,9 +497,8 @@ let isCall = 1,
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
- (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
- "blx${p}\t$func",
- [(ARMcall tglobaladdr:$func)]>,
+ (outs), (ins pred:$p, thumb_blx_target:$func), IIC_Br,
+ "blx${p}\t$func", []>,
Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{26} = func{23};
@@ -488,7 +512,7 @@ let isCall = 1,
// Also used for Thumb2
def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br,
"blx${p}\t$func",
- [(ARMtcall GPR:$func)]>,
+ [(ARMcall GPR:$func)]>,
Requires<[IsThumb, HasV5T]>,
T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24;
bits<4> func;
@@ -496,6 +520,17 @@ let isCall = 1,
let Inst{2-0} = 0b000;
}
+ // ARMv8-M Security Extensions
+ def tBLXNSr : TI<(outs), (ins pred:$p, GPRnopc:$func), IIC_Br,
+ "blxns${p}\t$func", []>,
+ Requires<[IsThumb, Has8MSecExt]>,
+ T1Special<{1,1,1,?}>, Sched<[WriteBrL]> {
+ bits<4> func;
+ let Inst{6-3} = func;
+ let Inst{2-0} = 0b100;
+ let Unpredictable{1-0} = 0b11;
+ }
+
// ARMv4T
def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
4, IIC_Br,
@@ -517,8 +552,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
// Just a pseudo for a tBL instruction. Needed to let regalloc know about
// the clobber of LR.
let Defs = [LR] in
- def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p),
- 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>,
+ def tBfar : tPseudoExpand<(outs), (ins thumb_bl_target:$target, pred:$p),
+ 4, IIC_Br, [],
+ (tBL pred:$p, thumb_bl_target:$target)>,
Sched<[WriteBrTbl]>;
def tBR_JTr : tPseudoInst<(outs),
@@ -534,7 +570,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
// a two-value operand where a dag node expects two operands. :(
let isBranch = 1, isTerminator = 1 in
- def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+ def tBcc : T1I<(outs), (ins thumb_bcc_target:$target, pred:$p), IIC_Br,
"b${p}\t$target",
[/*(ARMbrcond bb:$target, imm:$cc)*/]>,
T1BranchCond<{1,1,0,1}>, Sched<[WriteBr]> {
@@ -663,19 +699,19 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iLoad_r, IIC_iLoad_i, "ldr",
- UnOpFrag<(load node:$Src)>>;
+ load>;
// A8.6.64 & A8.6.61
defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
- UnOpFrag<(zextloadi8 node:$Src)>>;
+ zextloadi8>;
// A8.6.76 & A8.6.73
defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
- UnOpFrag<(zextloadi16 node:$Src)>>;
+ zextloadi16>;
let AddedComplexity = 10 in
def tLDRSB : // A8.6.80
@@ -706,19 +742,19 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iStore_r, IIC_iStore_i, "str",
- BinOpFrag<(store node:$LHS, node:$RHS)>>;
+ store>;
// A8.6.197 & A8.6.195
defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
- BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+ truncstorei8>;
// A8.6.207 & A8.6.205
defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
- BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+ truncstorei16>;
//===----------------------------------------------------------------------===//
@@ -770,7 +806,7 @@ def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
} // hasSideEffects
def : InstAlias<"ldm${p} $Rn!, $regs",
- (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>,
+ (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs), 0>,
Requires<[IsThumb, IsThumb1Only]>;
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
@@ -1310,7 +1346,14 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
AddrModeNone, 0, IndexModeNone,
Pseudo, NoItinerary, "", "",
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsThumb]>;
+ Requires<[IsThumb,IsNotWindows]>;
+
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+ Defs = [ R11, LR, SP ] in
+def tInt_WIN_eh_sjlj_longjmp
+ : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone,
+ Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb,IsWindows]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -1380,16 +1423,9 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst),
(tLEApcrelJT tjumptable:$dst)>;
// Direct calls
-def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+def : T1Pat<(ARMcall texternalsym:$func), (tBL texternalsym:$func)>,
Requires<[IsThumb]>;
-def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
- Requires<[IsThumb, HasV5T, IsNotMClass]>;
-
-// Indirect calls to ARM routines
-def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
- Requires<[IsThumb, HasV5T]>;
-
// zextload i1 -> zextload i8
def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
(tLDRBi t_addrmode_is1:$addr)>;
@@ -1415,6 +1451,24 @@ def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
+// post-inc loads and stores
+
+// post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is
+// different to how ISel expects them for a post-inc load, so use a pseudo
+// and expand it just after ISel.
+let usesCustomInserter = 1,
+ Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in
+ def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb),
+ (ins rGPR:$Rn, pred:$p),
+ 4, IIC_iStore_ru,
+ []>;
+
+// post-inc STR -> STM r0!, {r1}. The layout of this (because it doesn't def
+// multiple registers) is the same in ISel as MachineInstr, so there's no need
+// for a pseudo.
+def : T1Pat<(post_store rGPR:$Rt, rGPR:$Rn, 4),
+ (tSTMIA_UPD rGPR:$Rn, rGPR:$Rt)>;
+
// If it's impossible to use [r,r] address mode for sextload, select to
// ldr{b|h} + sxt{b|h} instead.
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
@@ -1474,6 +1528,10 @@ def : T1Pat<(i32 thumb_immshifted:$src),
def : T1Pat<(i32 imm0_255_comp:$src),
(tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+def : T1Pat<(i32 imm256_510:$src),
+ (tADDi8 (tMOVi8 255),
+ (thumb_imm256_510_addend imm:$src))>;
+
// Pseudo instruction that combines ldr from constpool and add pc. This should
// be expanded into two instructions late to allow if-conversion and
// scheduling.
@@ -1502,7 +1560,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00
// encoding is available on ARMv6K, but we don't differentiate that finely.
-def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
+def : InstAlias<"nop", (tMOVr R8, R8, 14, 0), 0>, Requires<[IsThumb, IsThumb1Only]>;
// For round-trip assembly/disassembly, we have to handle a CPS instruction
@@ -1524,3 +1582,8 @@ def : tInstAlias<"lsr${s}${p} $Rdm, $imm",
(tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
def : tInstAlias<"asr${s}${p} $Rdm, $imm",
(tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
+
+// Pseudo instruction ldr Rt, =immediate
+def tLDRConstPool
+ : tAsmPseudo<"ldr${p} $Rt, $immediate",
+ (ins tGPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index f42f4569b2f8..55e5308be40e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -270,7 +270,7 @@ def t2addrmode_so_reg : MemOperand,
let EncoderMethod = "getT2AddrModeSORegOpValue";
let DecoderMethod = "DecodeT2AddrModeSOReg";
let ParserMatchClass = t2addrmode_so_reg_asmoperand;
- let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+ let MIOperandInfo = (ops GPRnopc:$base, rGPR:$offsreg, i32imm:$offsimm);
}
// Addresses for the TBB/TBH instructions.
@@ -576,8 +576,8 @@ class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4,
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0,
- string wide = ""> {
+ SDPatternOperator opnode, bit Commutable = 0,
+ string wide = ""> {
// shifted imm
def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
@@ -632,7 +632,7 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
// the ".w" suffix to indicate that they are wide.
multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> :
+ SDPatternOperator opnode, bit Commutable = 0> :
T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> {
// Assembler aliases w/ the ".w" suffix.
def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
@@ -661,7 +661,7 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
/// reversed. The 'rr' form is only defined for the disassembler; for codegen
/// it is equivalent to the T2I_bin_irs counterpart.
-multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, SDNode opnode> {
// shifted imm
def ri : T2sTwoRegImm<
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
@@ -705,7 +705,7 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
let hasPostISelHook = 1, Defs = [CPSR] in {
multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
- InstrItinClass iis, PatFrag opnode,
+ InstrItinClass iis, SDNode opnode,
bit Commutable = 0> {
// shifted imm
def ri : t2PseudoInst<(outs rGPR:$Rd),
@@ -735,7 +735,7 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
/// T2I_rbin_s_is - Same as T2I_bin_s_irs, except selection DAG
/// operands are reversed.
let hasPostISelHook = 1, Defs = [CPSR] in {
-multiclass T2I_rbin_s_is<PatFrag opnode> {
+multiclass T2I_rbin_s_is<SDNode opnode> {
// shifted imm
def ri : t2PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$Rn, t2_so_imm:$imm, pred:$p),
@@ -755,7 +755,7 @@ multiclass T2I_rbin_s_is<PatFrag opnode> {
/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
/// patterns for a binary operation that produces a value.
-multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
+multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode,
bit Commutable = 0> {
// shifted imm
// The register-immediate version is re-materializable. This is useful
@@ -824,7 +824,7 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
/// for a binary operation that produces a value and use the carry
/// bit. It's not predicable.
let Defs = [CPSR], Uses = [CPSR] in {
-multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, SDNode opnode,
bit Commutable = 0> {
// shifted imm
def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
@@ -864,7 +864,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
// rotate operation that produces a value.
-multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, SDNode opnode> {
// 5-bit imm
def ri : T2sTwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
@@ -919,7 +919,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
/// a explicit result, only implicitly set CPSR.
multiclass T2I_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode> {
+ SDPatternOperator opnode> {
let isCompare = 1, Defs = [CPSR] in {
// shifted imm
def ri : T2OneRegCmpImm<
@@ -1260,20 +1260,19 @@ def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
// Load
let canFoldAsLoad = 1, isReMaterializable = 1 in
-defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
- UnOpFrag<(load node:$Src)>>;
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, load>;
// Loads with zero extension
defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPRnopc, UnOpFrag<(zextloadi16 node:$Src)>>;
+ GPRnopc, zextloadi16>;
defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPRnopc, UnOpFrag<(zextloadi8 node:$Src)>>;
+ GPRnopc, zextloadi8>;
// Loads with sign extension
defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPRnopc, UnOpFrag<(sextloadi16 node:$Src)>>;
+ GPRnopc, sextloadi16>;
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>;
+ GPRnopc, sextloadi8>;
let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
@@ -1414,7 +1413,7 @@ def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary,
- opc, asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ opc, asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> {
bits<4> Rt;
bits<4> addr;
@@ -1438,12 +1437,11 @@ def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt),
(ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>;
// Store
-defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
- BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, store>;
defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
- rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+ rGPR, truncstorei8>;
defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
- rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+ rGPR, truncstorei16>;
// Store doubleword
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
@@ -1586,7 +1584,7 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
class T2Istrrel<bits<2> bit54, dag oops, dag iops,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc,
- asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> {
bits<4> Rt;
bits<4> addr;
@@ -1906,7 +1904,8 @@ def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
"movw", "\t$Rd, $imm",
- [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]> {
+ [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]>,
+ Requires<[IsThumb, HasV8MBaseline]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
@@ -1924,8 +1923,9 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
let DecoderMethod = "DecodeT2MOVTWInstruction";
}
-def : t2InstAlias<"mov${p} $Rd, $imm",
- (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>;
+def : InstAlias<"mov${p} $Rd, $imm",
+ (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
@@ -1936,7 +1936,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
"movt", "\t$Rd, $imm",
[(set rGPR:$Rd,
(or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]>,
- Sched<[WriteALU]> {
+ Sched<[WriteALU]>,
+ Requires<[IsThumb, HasV8MBaseline]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0110;
@@ -1956,7 +1957,7 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
(ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
- Sched<[WriteALU]>;
+ Sched<[WriteALU]>, Requires<[IsThumb, HasV8MBaseline]>;
} // Constraints
def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
@@ -1997,7 +1998,7 @@ def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
def t2UXTH : T2I_ext_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
- UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+ UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
@@ -2029,10 +2030,8 @@ def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
// Arithmetic Instructions.
//
-defm t2ADD : T2I_bin_ii12rs<0b000, "add",
- BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
-defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
- BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+defm t2ADD : T2I_bin_ii12rs<0b000, "add", add, 1>;
+defm t2SUB : T2I_bin_ii12rs<0b101, "sub", sub>;
// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
//
@@ -2044,25 +2043,20 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
// support for an optional CPSR definition that corresponds to the DAG
// node's second value. We can then eliminate the implicit def of CPSR.
-defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
- BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
-defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
- BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMaddc, 1>;
+defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, ARMsubc>;
let hasPostISelHook = 1 in {
-defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
- BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
-defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
- BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
+defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
+defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;
}
// RSB
-defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
- BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>;
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
-defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <ARMsubc>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
// The assume-no-carry-in form uses the negation of the input since add/sub
@@ -2293,19 +2287,17 @@ def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>;
+def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
+ (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
//
-defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31,
- BinOpFrag<(shl node:$LHS, node:$RHS)>>;
-defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr,
- BinOpFrag<(srl node:$LHS, node:$RHS)>>;
-defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr,
- BinOpFrag<(sra node:$LHS, node:$RHS)>>;
-defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
- BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, shl>;
+defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, srl>;
+defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, sra>;
+defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>;
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
@@ -2362,14 +2354,11 @@ def t2MOVsra_flag : T2TwoRegShiftImm<
//
defm t2AND : T2I_bin_w_irs<0b0000, "and",
- IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi, and, 1>;
defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
- IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi, or, 1>;
defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
- IIC_iBITi, IIC_iBITr, IIC_iBITsi,
- BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi, xor, 1>;
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
IIC_iBITi, IIC_iBITr, IIC_iBITsi,
@@ -2516,7 +2505,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn",
IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
- UnOpFrag<(not node:$Src)>, 1, 1, 1>;
+ not, 1, 1, 1>;
let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
@@ -2606,8 +2595,9 @@ def t2UMLAL : T2MlaLong<0b110, 0b0000,
def t2UMAAL : T2MulLong<0b110, 0b0110,
(outs rGPR:$RdLo, rGPR:$RdHi),
- (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
Requires<[IsThumb2, HasDSP]>;
} // hasSideEffects
@@ -2677,7 +2667,7 @@ def t2SMMLSR:T2FourReg<
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-multiclass T2I_smul<string opc, PatFrag opnode> {
+multiclass T2I_smul<string opc, SDNode opnode> {
def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
@@ -2756,7 +2746,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
}
-multiclass T2I_smla<string opc, PatFrag opnode> {
+multiclass T2I_smla<string opc, SDNode opnode> {
def BB : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
@@ -2835,8 +2825,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
}
}
-defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMUL : T2I_smul<"smul", mul>;
+defm t2SMLA : T2I_smla<"smla", mul>;
// Halfword multiple accumulate long: SMLAL<x><y>
def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
@@ -2923,7 +2913,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb2]> {
+ Requires<[HasDivide, IsThumb, HasV8MBaseline]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
let Inst{20} = 0b1;
@@ -2934,7 +2924,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb2]> {
+ Requires<[HasDivide, IsThumb, HasV8MBaseline]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
let Inst{20} = 0b1;
@@ -3080,8 +3070,7 @@ def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>;
// Comparison Instructions...
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
- IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
- BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, ARMcmp>;
def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
(t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>;
@@ -3288,15 +3277,18 @@ let mayLoad = 1 in {
def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexb", "\t$Rt, $addr", "",
- [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
+ [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexh", "\t$Rt, $addr", "",
- [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
+ [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
AddrModeNone, 4, NoItinerary,
"ldrex", "\t$Rt, $addr", "",
- [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]> {
+ [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>,
+ Requires<[IsThumb, HasV8MBaseline]> {
bits<4> Rt;
bits<12> addr;
let Inst{31-27} = 0b11101;
@@ -3320,17 +3312,17 @@ def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldaexb", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8]>;
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldaexh", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8]>;
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldaex", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8]> {
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> {
bits<4> Rt;
bits<4> addr;
let Inst{31-27} = 0b11101;
@@ -3345,7 +3337,8 @@ def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2),
(ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldaexd", "\t$Rt, $Rt2, $addr", "",
- [], {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ [], {?, ?, ?, ?}>, Requires<[IsThumb,
+ HasAcquireRelease, HasV7Clrex, IsNotMClass]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
@@ -3359,20 +3352,23 @@ def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd),
AddrModeNone, 4, NoItinerary,
"strexb", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
- (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>;
+ (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexh", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
- (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>;
+ (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
t2addrmode_imm0_1020s4:$addr),
AddrModeNone, 4, NoItinerary,
"strex", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
- (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]> {
+ (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>,
+ Requires<[IsThumb, HasV8MBaseline]> {
bits<4> Rd;
bits<4> Rt;
bits<12> addr;
@@ -3399,7 +3395,8 @@ def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd),
"stlexb", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
(stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8]>;
+ Requires<[IsThumb, HasAcquireRelease,
+ HasV7Clrex]>;
def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
@@ -3407,7 +3404,8 @@ def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
"stlexh", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
(stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8]>;
+ Requires<[IsThumb, HasAcquireRelease,
+ HasV7Clrex]>;
def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
addr_offset_none:$addr),
@@ -3415,7 +3413,7 @@ def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
"stlex", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
(stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8]> {
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> {
bits<4> Rd;
bits<4> Rt;
bits<4> addr;
@@ -3431,14 +3429,15 @@ def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
- {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ {?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease,
+ HasV7Clrex, IsNotMClass]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
}
def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>,
- Requires<[IsThumb2, HasV7]> {
+ Requires<[IsThumb, HasV7Clrex]> {
let Inst{31-16} = 0xf3bf;
let Inst{15-14} = 0b10;
let Inst{13} = 0;
@@ -3449,22 +3448,30 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>,
}
def : T2Pat<(and (ldrex_1 addr_offset_none:$addr), 0xff),
- (t2LDREXB addr_offset_none:$addr)>;
+ (t2LDREXB addr_offset_none:$addr)>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def : T2Pat<(and (ldrex_2 addr_offset_none:$addr), 0xffff),
- (t2LDREXH addr_offset_none:$addr)>;
+ (t2LDREXH addr_offset_none:$addr)>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
- (t2STREXB GPR:$Rt, addr_offset_none:$addr)>;
+ (t2STREXB GPR:$Rt, addr_offset_none:$addr)>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
- (t2STREXH GPR:$Rt, addr_offset_none:$addr)>;
+ (t2STREXH GPR:$Rt, addr_offset_none:$addr)>,
+ Requires<[IsThumb, HasV8MBaseline]>;
def : T2Pat<(and (ldaex_1 addr_offset_none:$addr), 0xff),
- (t2LDAEXB addr_offset_none:$addr)>;
+ (t2LDAEXB addr_offset_none:$addr)>,
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
def : T2Pat<(and (ldaex_2 addr_offset_none:$addr), 0xffff),
- (t2LDAEXH addr_offset_none:$addr)>;
+ (t2LDAEXH addr_offset_none:$addr)>,
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
def : T2Pat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
- (t2STLEXB GPR:$Rt, addr_offset_none:$addr)>;
+ (t2STLEXB GPR:$Rt, addr_offset_none:$addr)>,
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
- (t2STLEXH GPR:$Rt, addr_offset_none:$addr)>;
+ (t2STLEXH GPR:$Rt, addr_offset_none:$addr)>,
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
//===----------------------------------------------------------------------===//
// SJLJ Exception handling intrinsics
@@ -3517,9 +3524,10 @@ def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
-def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
+def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br,
"b", ".w\t$target",
- [(br bb:$target)]>, Sched<[WriteBr]> {
+ [(br bb:$target)]>, Sched<[WriteBr]>,
+ Requires<[IsThumb, HasV8MBaseline]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
let Inst{12} = 1;
@@ -3609,9 +3617,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS version.
let Uses = [SP] in
def tTAILJMPd: tPseudoExpand<(outs),
- (ins uncondbrtarget:$dst, pred:$p),
+ (ins thumb_br_target:$dst, pred:$p),
4, IIC_Br, [],
- (t2B uncondbrtarget:$dst, pred:$p)>,
+ (t2B thumb_br_target:$dst, pred:$p)>,
Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>;
}
@@ -3647,10 +3655,10 @@ def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>,
// Compare and branch on zero / non-zero
let isBranch = 1, isTerminator = 1 in {
- def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, thumb_cb_target:$target), IIC_Br,
"cbz\t$Rn, $target", []>,
T1Misc<{0,0,?,1,?,?,?}>,
- Requires<[IsThumb2]>, Sched<[WriteBr]> {
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteBr]> {
// A8.6.27
bits<6> target;
bits<3> Rn;
@@ -3659,10 +3667,10 @@ let isBranch = 1, isTerminator = 1 in {
let Inst{2-0} = Rn;
}
- def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ def tCBNZ : T1I<(outs), (ins tGPR:$Rn, thumb_cb_target:$target), IIC_Br,
"cbnz\t$Rn, $target", []>,
T1Misc<{1,0,?,1,?,?,?}>,
- Requires<[IsThumb2]>, Sched<[WriteBr]> {
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteBr]> {
// A8.6.27
bits<6> target;
bits<3> Rn;
@@ -3715,15 +3723,21 @@ def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",
let Inst{7-0} = imm;
}
-def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p)>;
-def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
-def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
-def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
-def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
-def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
-def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> {
+def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p), 0>;
+def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p), 1>;
+def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p), 1>;
+def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p), 1>;
+def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p), 1>;
+def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p), 1>;
+def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p), 1> {
let Predicates = [IsThumb2, HasV8];
}
+def : t2InstAlias<"esb$p.w", (t2HINT 16, pred:$p), 1> {
+ let Predicates = [IsThumb2, HasRAS];
+}
+def : t2InstAlias<"esb$p", (t2HINT 16, pred:$p), 0> {
+ let Predicates = [IsThumb2, HasRAS];
+}
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
[(int_arm_dbg imm0_15:$opt)]> {
@@ -3848,7 +3862,7 @@ def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>;
// ERET - Return from exception in Hypervisor mode.
// B9.3.3, B9.3.20: ERET is an alias for "SUBS PC, LR, #0" in an implementation that
// includes virtualization extensions.
-def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p)>,
+def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p), 1>,
Requires<[IsThumb2, HasVirtualization]>;
//===----------------------------------------------------------------------===//
@@ -3871,7 +3885,7 @@ let isReMaterializable = 1 in {
def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
IIC_iMOVix2addpc,
[(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
- Requires<[IsThumb2, UseMovt]>;
+ Requires<[IsThumb, HasV8MBaseline, UseMovt]>;
}
@@ -3883,12 +3897,13 @@ def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst),
Requires<[IsThumb2, UseMovt]>;
// ConstantPool, GlobalAddress, and JumpTable
-def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
-def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
- Requires<[IsThumb2, UseMovt]>;
+def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
+def : T2Pat<(ARMWrapper texternalsym :$dst), (t2MOVi32imm texternalsym :$dst)>,
+ Requires<[IsThumb, HasV8MBaseline, UseMovt]>;
+def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsThumb, HasV8MBaseline, UseMovt]>;
-def : T2Pat<(ARMWrapperJT tjumptable:$dst),
- (t2LEApcrelJT tjumptable:$dst)>;
+def : T2Pat<(ARMWrapperJT tjumptable:$dst), (t2LEApcrelJT tjumptable:$dst)>;
// Pseudo instruction that combines ldr from constpool and add pc. This should
// be expanded into two instructions late to allow if-conversion and
@@ -3910,16 +3925,16 @@ def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
//===----------------------------------------------------------------------===//
// Coprocessor load/store -- for disassembly only
//
-class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm>
- : T2I<oops, iops, NoItinerary, opc, asm, []> {
+class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, NoItinerary, opc, asm, pattern> {
let Inst{31-28} = op31_28;
let Inst{27-25} = 0b110;
}
-multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
+multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : T2CI<op31_28,
(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr"> {
+ asm, "\t$cop, $CRd, $addr", pattern> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -3936,7 +3951,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
}
def _PRE : T2CI<op31_28,
(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
- asm, "\t$cop, $CRd, $addr!"> {
+ asm, "\t$cop, $CRd, $addr!", []> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -3954,7 +3969,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
def _POST: T2CI<op31_28,
(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
postidx_imm8s4:$offset),
- asm, "\t$cop, $CRd, $addr, $offset"> {
+ asm, "\t$cop, $CRd, $addr, $offset", []> {
bits<9> offset;
bits<4> addr;
bits<4> cop;
@@ -3973,7 +3988,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
def _OPTION : T2CI<op31_28, (outs),
(ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr,
coproc_option_imm:$option),
- asm, "\t$cop, $CRd, $addr, $option"> {
+ asm, "\t$cop, $CRd, $addr, $option", []> {
bits<8> option;
bits<4> addr;
bits<4> cop;
@@ -3991,14 +4006,15 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> {
}
}
-defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
-defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
-defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
-defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
-defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8,IsThumb2]>;
-defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8,IsThumb2]>;
-defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8,IsThumb2]>;
-defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8,IsThumb2]>;
+defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
+defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
+
+defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
+defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
//===----------------------------------------------------------------------===//
@@ -4070,6 +4086,7 @@ def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$SYSm), NoItinerary,
// same and the assembly parser has no way to distinguish between them. The mask
// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
// the mask with the fields to be accessed in the special register.
+let Defs = [CPSR] in
def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
NoItinerary, "msr", "\t$mask, $Rn", []>,
Requires<[IsThumb2,IsNotMClass]> {
@@ -4105,6 +4122,7 @@ def t2MSRbanked : T2I<(outs), (ins banked_reg:$banked, rGPR:$Rn),
// M class MSR.
//
// Move from ARM core register to Special Register
+let Defs = [CPSR] in
def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
NoItinerary, "msr", "\t$SYSm, $Rn", []>,
Requires<[IsThumb,IsMClass]> {
@@ -4314,6 +4332,37 @@ def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>,
}
//===----------------------------------------------------------------------===//
+// ARMv8-M Security Extensions instructions
+//
+
+let hasSideEffects = 1 in
+def t2SG : T2I<(outs), (ins), NoItinerary, "sg", "", []>,
+ Requires<[Has8MSecExt]> {
+ let Inst = 0xe97fe97f;
+}
+
+class T2TT<bits<2> at, string asm, list<dag> pattern>
+ : T2I<(outs rGPR:$Rt), (ins GPRnopc:$Rn), NoItinerary, asm, "\t$Rt, $Rn",
+ pattern> {
+ bits<4> Rn;
+ bits<4> Rt;
+
+ let Inst{31-20} = 0b111010000100;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rt;
+ let Inst{7-6} = at;
+ let Inst{5-0} = 0b000000;
+
+ let Unpredictable{5-0} = 0b111111;
+}
+
+def t2TT : T2TT<0b00, "tt", []>, Requires<[IsThumb,Has8MSecExt]>;
+def t2TTT : T2TT<0b01, "ttt", []>, Requires<[IsThumb,Has8MSecExt]>;
+def t2TTA : T2TT<0b10, "tta", []>, Requires<[IsThumb,Has8MSecExt]>;
+def t2TTAT : T2TT<0b11, "ttat", []>, Requires<[IsThumb,Has8MSecExt]>;
+
+//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -4488,9 +4537,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
(t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
// Memory barriers
-def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[HasDB]>;
-def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[HasDB]>;
-def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>;
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
@@ -4535,13 +4584,13 @@ def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
(t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>;
-// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the
-// shift amount is zero (i.e., unspecified).
+// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT with the
+// input operands swapped when the shift amount is zero (i.e., unspecified).
def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
- (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
- (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ (t2PKHBT rGPR:$Rd, rGPR:$Rm, rGPR:$Rn, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
// PUSH/POP aliases for STM/LDM
@@ -4620,16 +4669,16 @@ def : t2InstAlias<"strh${p} $Rt, $addr",
// Extend instruction optional rotate operand.
def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
- (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
- (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
- (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : InstAlias<"sxtb16${p} $Rd, $Rm",
- (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>,
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : t2InstAlias<"sxtb${p} $Rd, $Rm",
@@ -4642,16 +4691,16 @@ def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
(t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
- (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
- (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
- (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>,
+ (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : InstAlias<"uxtb16${p} $Rd, $Rm",
- (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>,
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : t2InstAlias<"uxtb${p} $Rd, $Rm",
@@ -4667,7 +4716,7 @@ def : t2InstAlias<"uxth${p}.w $Rd, $Rm",
def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
(t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : InstAlias<"uxtb16${p} $Rd, $Rm$rot",
- (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>,
+ (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
(t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
@@ -4675,7 +4724,7 @@ def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
(t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : InstAlias<"sxtb16${p} $Rd, $Rm$rot",
- (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>,
+ (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>,
Requires<[HasT2ExtractPack, IsThumb2]>;
def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
(t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
@@ -4764,9 +4813,14 @@ def : t2InstAlias<"ldrsh${p}.w $Rt, $addr",
def : t2InstAlias<"add${p} $Rd, pc, $imm",
(t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>;
+// Pseudo instruction ldr Rt, =immediate
+def t2LDRConstPool
+ : t2AsmPseudo<"ldr${p} $Rt, $immediate",
+ (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
+
// PLD/PLDW/PLI with alternate literal form.
def : t2InstAlias<"pld${p} $addr",
(t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>;
def : InstAlias<"pli${p} $addr",
- (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>,
+ (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p), 0>,
Requires<[IsThumb2,HasV7]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 63e7940bb14e..e29d265ae3d1 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -30,6 +30,18 @@ def FPImmOperand : AsmOperandClass {
let ParserMethod = "parseFPImm";
}
+def vfp_f16imm : Operand<f16>,
+ PatLeaf<(f16 fpimm), [{
+ return ARM_AM::getFP16Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP16Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = FPImmOperand;
+}
+
def vfp_f32imm : Operand<f32>,
PatLeaf<(f32 fpimm), [{
return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
@@ -98,6 +110,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
let D = VFPNeonDomain;
}
+def VLDRH : AHI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5fp16:$addr),
+ IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
+ []>,
+ Requires<[HasFullFP16]>;
+
} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
@@ -112,6 +129,11 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
let D = VFPNeonDomain;
}
+def VSTRH : AHI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5fp16:$addr),
+ IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
+ []>,
+ Requires<[HasFullFP16]>;
+
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -200,6 +222,37 @@ defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>;
def : MnemonicAlias<"vldm", "vldmia">;
def : MnemonicAlias<"vstm", "vstmia">;
+
+//===----------------------------------------------------------------------===//
+// Lazy load / store multiple Instructions
+//
+let mayLoad = 1 in
+def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
+ IIC_fpLoad_m, "vlldm${p}\t$Rn", "", []>,
+ Requires<[HasV8MMainline, Has8MSecExt]> {
+ let Inst{24-23} = 0b00;
+ let Inst{22} = 0;
+ let Inst{21} = 1;
+ let Inst{20} = 1;
+ let Inst{15-12} = 0;
+ let Inst{7-0} = 0;
+ let mayLoad = 1;
+}
+
+let mayStore = 1 in
+def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
+ IIC_fpStore_m, "vlstm${p}\t$Rn", "", []>,
+ Requires<[HasV8MMainline, Has8MSecExt]> {
+ let Inst{24-23} = 0b00;
+ let Inst{22} = 0;
+ let Inst{21} = 1;
+ let Inst{20} = 0;
+ let Inst{15-12} = 0;
+ let Inst{7-0} = 0;
+ let mayStore = 1;
+}
+
+
// FLDM/FSTM - Load / Store multiple single / double precision registers for
// pre-ARMv6 cores.
// These instructions are deprecated!
@@ -221,13 +274,13 @@ def : VFP2MnemonicAlias<"fstmdbd", "vstmdb">;
def : VFP2MnemonicAlias<"fstmead", "vstmia">;
def : VFP2MnemonicAlias<"fstmfdd", "vstmdb">;
-def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>,
+def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>,
Requires<[HasVFP2]>;
-def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>,
+def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r), 0>,
Requires<[HasVFP2]>;
-def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
+def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r), 0>,
Requires<[HasVFP2]>;
-def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
+def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r), 0>,
Requires<[HasVFP2]>;
defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
(VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
@@ -295,6 +348,12 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0,
let D = VFPNeonA8Domain;
}
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VADDH : AHbI<0b11100, 0b11, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
+ []>;
+
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
@@ -311,6 +370,12 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
let D = VFPNeonA8Domain;
}
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VSUBH : AHbI<0b11100, 0b11, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
+ []>;
+
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
@@ -323,6 +388,12 @@ def VDIVS : ASbI<0b11101, 0b00, 0, 0,
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VDIVH : AHbI<0b11101, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
+ []>;
+
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VMULD : ADbI<0b11100, 0b10, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
@@ -339,6 +410,12 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0,
let D = VFPNeonA8Domain;
}
+let TwoOperandAliasConstraint = "$Sn = $Sd" in
+def VMULH : AHbI<0b11100, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
+ []>;
+
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
@@ -353,9 +430,20 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
let D = VFPNeonA8Domain;
}
+def VNMULH : AHbI<0b11100, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
+ []>;
+
multiclass vsel_inst<string op, bits<2> opc, int CC> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
Uses = [CPSR], AddedComplexity = 4 in {
+ def H : AHbInp<0b11100, opc, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
+ []>,
+ Requires<[HasFullFP16]>;
+
def S : ASbInp<0b11100, opc, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
@@ -378,6 +466,12 @@ defm VSELVS : vsel_inst<"vs", 0b01, 6>;
multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+ def H : AHbInp<0b11101, 0b00, opc,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"),
+ []>,
+ Requires<[HasFullFP16]>;
+
def S : ASbInp<0b11101, 0b00, opc,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"),
@@ -418,6 +512,12 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
let D = VFPNeonA8Domain;
}
+def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
+ []>;
+
+
// FIXME: Verify encoding after integrated assembler is working.
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
@@ -432,6 +532,11 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
+
+def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
+ []>;
} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
@@ -452,6 +557,11 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
let D = VFPNeonA8Domain;
}
+def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm",
+ []>;
+
let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
@@ -473,6 +583,14 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
let D = VFPNeonA8Domain;
}
+def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
+ []> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
// FIXME: Verify encoding after integrated assembler is working.
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
@@ -493,6 +611,14 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
+
+def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
+ []> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
} // Defs = [FPSCR_NZCV]
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
@@ -627,6 +753,22 @@ def : Pat<(f64 (f16_to_fp GPR:$a)),
multiclass vcvt_inst<string opc, bits<2> rm,
SDPatternOperator node = null_frag> {
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"),
+ []>,
+ Requires<[HasFullFP16]> {
+ let Inst{17-16} = rm;
+ }
+
+ def UH : AHuInp<0b11101, 0b11, 0b1100, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vcvt", opc, ".u32.f16\t$Sd, $Sm"),
+ []>,
+ Requires<[HasFullFP16]> {
+ let Inst{17-16} = rm;
+ }
+
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
@@ -715,7 +857,21 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
let D = VFPNeonA8Domain;
}
+def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA16, "vneg", ".f16\t$Sd, $Sm",
+ []>;
+
multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
+ def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm",
+ []>,
+ Requires<[HasFullFP16]> {
+ let Inst{7} = op2;
+ let Inst{16} = op;
+ }
+
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
@@ -733,11 +889,14 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
let Inst{16} = op;
}
+ def : InstAlias<!strconcat("vrint", opc, "$p.f16.f16\t$Sd, $Sm"),
+ (!cast<Instruction>(NAME#"H") SPR:$Sd, SPR:$Sm, pred:$p), 0>,
+ Requires<[HasFullFP16]>;
def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"),
- (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>,
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p), 0>,
Requires<[HasFPARMv8]>;
def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"),
- (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>,
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p), 0>,
Requires<[HasFPARMv8,HasDPVFP]>;
}
@@ -748,6 +907,13 @@ defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
multiclass vrint_inst_anpm<string opc, bits<2> rm,
SDPatternOperator node = null_frag> {
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"),
+ []>,
+ Requires<[HasFullFP16]> {
+ let Inst{17-16} = rm;
+ }
def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
@@ -765,10 +931,10 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
}
def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"),
- (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>,
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm), 0>,
Requires<[HasFPARMv8]>;
def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"),
- (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>,
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm), 0>,
Requires<[HasFPARMv8,HasDPVFP]>;
}
@@ -787,6 +953,11 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm",
+ []>;
+
let hasSideEffects = 0 in {
def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
@@ -795,6 +966,18 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+
+let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+def VMOVH : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>,
+ Requires<[HasFullFP16]>;
+
+def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA16, "vins.f16\t$Sd, $Sm", []>,
+ Requires<[HasFullFP16]>;
+} // PostEncoderMethod
} // hasSideEffects
//===----------------------------------------------------------------------===//
@@ -966,6 +1149,44 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
let DecoderMethod = "DecodeVMOVSRR";
}
+// Move H->R, clearing top 16 bits
+def VMOVRH : AVConv2I<0b11100001, 0b1001,
+ (outs GPR:$Rt), (ins SPR:$Sn),
+ IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
+ []>,
+ Requires<[HasFullFP16]> {
+ // Instruction operands.
+ bits<4> Rt;
+ bits<5> Sn;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+}
+
+// Move R->H, clearing top 16 bits
+def VMOVHR : AVConv4I<0b11100000, 0b1001,
+ (outs SPR:$Sn), (ins GPR:$Rt),
+ IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
+ []>,
+ Requires<[HasFullFP16]> {
+ // Instruction operands.
+ bits<5> Sn;
+ bits<4> Rt;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+}
+
// FMRDH: SPR -> GPR
// FMRDL: SPR -> GPR
// FMRRS: SPR -> GPR
@@ -1011,6 +1232,25 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{22} = Sd{0};
}
+class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Predicates = [HasFullFP16];
+}
+
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
@@ -1043,6 +1283,13 @@ def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOS (VLDRS addrmode5:$a))>;
+def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 1; // s32
+}
+
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
@@ -1075,6 +1322,13 @@ def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOS (VLDRS addrmode5:$a))>;
+def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 0; // u32
+}
+
// FP -> Int:
class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1113,6 +1367,25 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{22} = Sd{0};
}
+class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Predicates = [HasFullFP16];
+}
+
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
@@ -1147,6 +1420,13 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
addrmode5:$ptr),
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 1; // Z bit
+}
+
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
@@ -1180,6 +1460,13 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
addrmode5:$ptr),
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 1; // Z bit
+}
+
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
// FIXME: Verify encoding after integrated assembler is working.
@@ -1197,6 +1484,13 @@ def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
let Inst{7} = 0; // Z bit
}
+def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 0; // Z bit
+}
+
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
@@ -1210,6 +1504,13 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
[(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
let Inst{7} = 0; // Z bit
}
+
+def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
+ []> {
+ let Inst{7} = 0; // Z bit
+}
}
// Convert between floating-point and fixed-point
@@ -1249,6 +1550,26 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
let Predicates = [HasVFP2, HasDPVFP];
}
+def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
+ Requires<[HasFullFP16]>;
+
+def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
+ Requires<[HasFullFP16]>;
+
+def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
+ Requires<[HasFullFP16]>;
+
+def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
+ Requires<[HasFullFP16]>;
+
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
@@ -1299,6 +1620,26 @@ def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
// Fixed-Point to FP:
+def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
+ Requires<[HasFullFP16]>;
+
+def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
+ (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+ IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
+ Requires<[HasFullFP16]>;
+
+def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
+ Requires<[HasFullFP16]>;
+
+def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
+ (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+ IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
+ Requires<[HasFullFP16]>;
+
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
@@ -1373,6 +1714,13 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
let D = VFPNeonA8Domain;
}
+def VMLAH : AHbI<0b11100, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC16, "vmla", ".f16\t$Sd, $Sn, $Sm",
+ []>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1400,6 +1748,13 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
let D = VFPNeonA8Domain;
}
+def VMLSH : AHbI<0b11100, 0b00, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC16, "vmls", ".f16\t$Sd, $Sn, $Sm",
+ []>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1427,6 +1782,13 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
let D = VFPNeonA8Domain;
}
+def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm",
+ []>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1453,6 +1815,13 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
let D = VFPNeonA8Domain;
}
+def VNMLSH : AHbI<0b11100, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm",
+ []>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1482,6 +1851,13 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
// VFP pipelines.
}
+def VFMAH : AHbI<0b11101, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
+ []>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasFullFP16,UseFusedMAC]>;
+
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
@@ -1517,6 +1893,13 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
// VFP pipelines.
}
+def VFMSH : AHbI<0b11101, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm",
+ []>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasFullFP16,UseFusedMAC]>;
+
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
@@ -1559,6 +1942,13 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
// VFP pipelines.
}
+def VFNMAH : AHbI<0b11101, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm",
+ []>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasFullFP16,UseFusedMAC]>;
+
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
@@ -1600,6 +1990,13 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
// VFP pipelines.
}
+def VFNMSH : AHbI<0b11101, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm",
+ []>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasFullFP16,UseFusedMAC]>;
+
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
@@ -1780,6 +2177,23 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
let Inst{7-4} = 0b0000;
let Inst{3-0} = imm{3-0};
}
+
+def FCONSTH : VFPAI<(outs SPR:$Sd), (ins vfp_f16imm:$imm),
+ VFPMiscFrm, IIC_fpUNA16,
+ "vmov", ".f16\t$Sd, $imm",
+ []>, Requires<[HasFullFP16]> {
+ bits<5> Sd;
+ bits<8> imm;
+
+ let Inst{27-23} = 0b11101;
+ let Inst{22} = Sd{0};
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{7-4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{11-8} = 0b1001; // Half precision
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = imm{3-0};
+}
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 6e7e47b8706a..62d57f3f4986 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -60,9 +60,14 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
-namespace llvm {
-void initializeARMLoadStoreOptPass(PassRegistry &);
-}
+/// This switch disables formation of double/multi instructions that could
+/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
+/// disabled. This can be used to create libraries that are robust even when
+/// users provoke undefined behaviour by supplying misaligned pointers.
+/// \see mayCombineMisaligned()
+static cl::opt<bool>
+AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
+ cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
@@ -71,9 +76,7 @@ namespace {
/// form ldm / stm instructions.
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(ID) {
- initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
- }
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const MachineFunction *MF;
const TargetInstrInfo *TII;
@@ -90,6 +93,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return ARM_LOAD_STORE_OPT_NAME;
}
@@ -101,8 +109,8 @@ namespace {
MachineInstr *MI;
int Offset; ///< Load/Store offset.
unsigned Position; ///< Position as counted from end of basic block.
- MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
- : MI(MI), Offset(Offset), Position(Position) {}
+ MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
+ : MI(&MI), Offset(Offset), Position(Position) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
@@ -131,17 +139,19 @@ namespace {
MachineBasicBlock::const_iterator Before);
unsigned findFreeReg(const TargetRegisterClass &RegClass);
void UpdateBaseRegUses(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned Base, unsigned WordOffset,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
- MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
- MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
+ MachineInstr *CreateLoadStoreMulti(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs);
+ MachineInstr *CreateLoadStoreDouble(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs) const;
void FormCandidates(const MemOpQueue &MemOps);
MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@@ -156,10 +166,11 @@ namespace {
char ARMLoadStoreOpt::ID = 0;
}
-INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
+INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
+ false)
-static bool definesCPSR(const MachineInstr *MI) {
- for (const auto &MO : MI->operands()) {
+static bool definesCPSR(const MachineInstr &MI) {
+ for (const auto &MO : MI.operands()) {
if (!MO.isReg())
continue;
if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
@@ -171,11 +182,11 @@ static bool definesCPSR(const MachineInstr *MI) {
return false;
}
-static int getMemoryOpOffset(const MachineInstr *MI) {
- unsigned Opcode = MI->getOpcode();
+static int getMemoryOpOffset(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
- unsigned NumOperands = MI->getDesc().getNumOperands();
- unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+ unsigned NumOperands = MI.getDesc().getNumOperands();
+ unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
@@ -436,12 +447,12 @@ static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
/// Update future uses of the base register with the offset introduced
/// due to writeback. This function only works on Thumb1.
-void
-ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned Base,
- unsigned WordOffset,
- ARMCC::CondCodes Pred, unsigned PredReg) {
+void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned Base,
+ unsigned WordOffset,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
// Start updating any instructions with immediate offsets. Insert a SUB before
// the first non-updateable instruction (if any).
@@ -475,7 +486,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
InsertSub = true;
} else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
- !definesCPSR(MBBI)) {
+ !definesCPSR(*MBBI)) {
// SUBS/ADDS using this register, with a dead def of the CPSR.
// Merge it with the update; if the merged offset is too large,
// insert a new sub instead.
@@ -499,7 +510,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
InsertSub = true;
}
- } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
+ } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
// Since SUBS sets the condition flags, we can't place the base reset
// after an instruction that has a live CPSR def.
// The base register might also contain an argument for a function call.
@@ -552,7 +563,7 @@ void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
// Initialize if we never queried in this block.
if (!LiveRegsValid) {
LiveRegs.init(TRI);
- LiveRegs.addLiveOuts(&MBB, true);
+ LiveRegs.addLiveOuts(MBB);
LiveRegPos = MBB.end();
LiveRegsValid = true;
}
@@ -574,10 +585,11 @@ static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
/// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done.
-MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size();
assert(NumRegs > 1);
@@ -770,10 +782,11 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
return MIB.getInstr();
}
-MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs) const {
bool IsLoad = isi32Load(Opcode);
assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
@@ -836,11 +849,11 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
iterator InsertBefore = std::next(iterator(LatestMI));
MachineBasicBlock &MBB = *LatestMI->getParent();
- unsigned Offset = getMemoryOpOffset(First);
+ unsigned Offset = getMemoryOpOffset(*First);
unsigned Base = getLoadStoreBaseOp(*First).getReg();
bool BaseKill = LatestMI->killsRegister(Base);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
DebugLoc DL = First->getDebugLoc();
MachineInstr *Merged = nullptr;
if (Cand.CanMergeToLSDouble)
@@ -916,6 +929,24 @@ static bool isValidLSDoubleOffset(int Offset) {
return (Value % 4) == 0 && Value < 1024;
}
+/// Return true for loads/stores that can be combined to a double/multi
+/// operation without increasing the requirements for alignment.
+static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
+ const MachineInstr &MI) {
+ // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
+ // difference.
+ unsigned Opcode = MI.getOpcode();
+ if (!isi32Load(Opcode) && !isi32Store(Opcode))
+ return true;
+
+ // Stack pointer alignment is out of the programmers control so we can trust
+ // SP-relative loads/stores.
+ if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
+ STI.getFrameLowering()->getTransientStackAlignment() >= 4)
+ return true;
+ return false;
+}
+
/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
const MachineInstr *FirstMI = MemOps[0].MI;
@@ -946,7 +977,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
bool CanMergeToLSMulti = true;
// On swift vldm/vstm starting with an odd register number as that needs
// more uops than single vldrs.
- if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
+ if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
CanMergeToLSMulti = false;
// LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
@@ -954,6 +985,10 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
if (PReg == ARM::SP || PReg == ARM::PC)
CanMergeToLSMulti = CanMergeToLSDouble = false;
+ // Should we be conservative?
+ if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
+ CanMergeToLSMulti = CanMergeToLSDouble = false;
+
// Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
@@ -1102,11 +1137,11 @@ static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
unsigned MIPredReg;
if (MI.getOperand(0).getReg() != Reg ||
MI.getOperand(1).getReg() != Reg ||
- getInstrPredicate(&MI, MIPredReg) != Pred ||
+ getInstrPredicate(MI, MIPredReg) != Pred ||
MIPredReg != PredReg)
return 0;
- if (CheckCPSRDef && definesCPSR(&MI))
+ if (CheckCPSRDef && definesCPSR(MI))
return 0;
return MI.getOperand(2).getImm() * Scale;
}
@@ -1169,7 +1204,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
unsigned Base = BaseOP.getReg();
bool BaseKill = BaseOP.isKill();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
unsigned Opcode = MI->getOpcode();
DebugLoc DL = MI->getDebugLoc();
@@ -1193,10 +1228,30 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
} else {
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
- ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
- return false;
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
+
+ // We couldn't find an inc/dec to merge. But if the base is dead, we
+ // can still change to a writeback form as that will save us 2 bytes
+ // of code size. It can create WAW hazards though, so only do it if
+ // we're minimizing code size.
+ if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill)
+ return false;
+
+ bool HighRegsUsed = false;
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+ if (MI->getOperand(i).getReg() >= ARM::R8) {
+ HighRegsUsed = true;
+ break;
+ }
+
+ if (!HighRegsUsed)
+ MergeInstr = MBB.end();
+ else
+ return false;
+ }
}
- MBB.erase(MergeInstr);
+ if (MergeInstr != MBB.end())
+ MBB.erase(MergeInstr);
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1291,7 +1346,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
return false;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
MachineBasicBlock::iterator MBBI(MI);
@@ -1388,7 +1443,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
return false;
unsigned PredReg;
- ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
MachineBasicBlock::iterator MBBI(MI);
MachineBasicBlock &MBB = *MI.getParent();
int Offset;
@@ -1487,14 +1542,13 @@ static bool isMemoryOp(const MachineInstr &MI) {
}
static void InsertLDR_STR(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- int Offset, bool isDef,
- DebugLoc DL, unsigned NewOpc,
+ MachineBasicBlock::iterator &MBBI, int Offset,
+ bool isDef, const DebugLoc &DL, unsigned NewOpc,
unsigned Reg, bool RegDeadKill, bool RegUndef,
unsigned BaseReg, bool BaseKill, bool BaseUndef,
- bool OffKill, bool OffUndef,
- ARMCC::CondCodes Pred, unsigned PredReg,
- const TargetInstrInfo *TII, bool isT2) {
+ bool OffKill, bool OffUndef, ARMCC::CondCodes Pred,
+ unsigned PredReg, const TargetInstrInfo *TII,
+ bool isT2) {
if (isDef) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
@@ -1547,9 +1601,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
bool BaseUndef = BaseOp.isUndef();
bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
- int OffImm = getMemoryOpOffset(MI);
+ int OffImm = getMemoryOpOffset(*MI);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
if (OddRegNum > EvenRegNum && OffImm == 0) {
// Ascending register numbers and no offset. It's safe to change it to a
@@ -1655,14 +1709,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
unsigned Reg = MO.getReg();
unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
- int Offset = getMemoryOpOffset(MBBI);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
+ int Offset = getMemoryOpOffset(*MBBI);
if (CurrBase == 0) {
// Start of a new chain.
CurrBase = Base;
CurrOpc = Opcode;
CurrPred = Pred;
- MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
continue;
}
// Note: No need to match PredReg in the next if.
@@ -1690,7 +1744,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (!Overlap) {
// Check offset and sort memory operation into the current chain.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
continue;
} else {
MemOpQueue::iterator MI, ME;
@@ -1706,7 +1760,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
if (MI != MemOps.end()) {
- MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
+ MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
continue;
}
}
@@ -1723,7 +1777,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
MBBI->getOpcode() == ARM::t2STRDi8) {
// ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
// remember them because we may still be able to merge add/sub into them.
- MergeBaseCandidates.push_back(MBBI);
+ MergeBaseCandidates.push_back(&*MBBI);
}
@@ -1805,20 +1859,20 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
// Ignore any DBG_VALUE instructions.
while (PrevI->isDebugValue() && PrevI != MBB.begin())
--PrevI;
- MachineInstr *PrevMI = PrevI;
- unsigned Opcode = PrevMI->getOpcode();
+ MachineInstr &PrevMI = *PrevI;
+ unsigned Opcode = PrevMI.getOpcode();
if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
- MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
if (MO.getReg() != ARM::LR)
return false;
unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
- PrevMI->setDesc(TII->get(NewOpc));
+ PrevMI.setDesc(TII->get(NewOpc));
MO.setReg(ARM::PC);
- PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
+ PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
MBB.erase(MBBI);
return true;
}
@@ -1840,8 +1894,8 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
for (auto Use : Prev->uses())
if (Use.isKill()) {
AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
- .addReg(Use.getReg(), RegState::Kill))
- .copyImplicitOps(&*MBBI);
+ .addReg(Use.getReg(), RegState::Kill))
+ .copyImplicitOps(*MBBI);
MBB.erase(MBBI);
MBB.erase(Prev);
return true;
@@ -1851,6 +1905,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
}
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(*Fn.getFunction()))
+ return false;
+
MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TL = STI->getTargetLowering();
@@ -1877,10 +1934,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
return Modified;
}
-namespace llvm {
-void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
-}
-
#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
"ARM pre- register allocation load / store optimization pass"
@@ -1889,9 +1942,7 @@ namespace {
/// locations close to make it more likely they will be combined later.
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
- initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
- }
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
const DataLayout *TD;
const TargetInstrInfo *TII;
@@ -1922,10 +1973,13 @@ namespace {
char ARMPreAllocLoadStoreOpt::ID = 0;
}
-INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
+INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction()))
+ return false;
+
TD = &Fn.getDataLayout();
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TII = STI->getInstrInfo();
@@ -2034,7 +2088,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
return false;
// Then make sure the immediate offset fits.
- int OffImm = getMemoryOpOffset(Op0);
+ int OffImm = getMemoryOpOffset(*Op0);
if (isT2) {
int Limit = (1 << 8) * Scale;
if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
@@ -2056,7 +2110,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
if (FirstReg == SecondReg)
return false;
BaseReg = Op0->getOperand(1).getReg();
- Pred = getInstrPredicate(Op0, PredReg);
+ Pred = getInstrPredicate(*Op0, PredReg);
dl = Op0->getDebugLoc();
return true;
}
@@ -2070,11 +2124,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
// Sort by offset (in reverse order).
std::sort(Ops.begin(), Ops.end(),
[](const MachineInstr *LHS, const MachineInstr *RHS) {
- int LOffset = getMemoryOpOffset(LHS);
- int ROffset = getMemoryOpOffset(RHS);
- assert(LHS == RHS || LOffset != ROffset);
- return LOffset > ROffset;
- });
+ int LOffset = getMemoryOpOffset(*LHS);
+ int ROffset = getMemoryOpOffset(*RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ });
// The loads / stores of the same base are in order. Scan them from first to
// last and check for the following:
@@ -2106,7 +2160,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (LastOpcode && LSMOpcode != LastOpcode)
break;
- int Offset = getMemoryOpOffset(Op);
+ int Offset = getMemoryOpOffset(*Op);
unsigned Bytes = getLSMultipleTransferSize(Op);
if (LastBytes) {
if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
@@ -2141,8 +2195,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
} else {
// This is the new location for the loads / stores.
MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
- while (InsertPos != MBB->end()
- && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+ while (InsertPos != MBB->end() &&
+ (MemOps.count(&*InsertPos) || InsertPos->isDebugValue()))
++InsertPos;
// If we are moving a pair of loads / stores, see if it makes sense
@@ -2237,25 +2291,25 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator E = MBB->end();
while (MBBI != E) {
for (; MBBI != E; ++MBBI) {
- MachineInstr *MI = MBBI;
- if (MI->isCall() || MI->isTerminator()) {
+ MachineInstr &MI = *MBBI;
+ if (MI.isCall() || MI.isTerminator()) {
// Stop at barriers.
++MBBI;
break;
}
- if (!MI->isDebugValue())
- MI2LocMap[MI] = ++Loc;
+ if (!MI.isDebugValue())
+ MI2LocMap[&MI] = ++Loc;
- if (!isMemoryOp(*MI))
+ if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;
if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
continue;
- int Opc = MI->getOpcode();
+ int Opc = MI.getOpcode();
bool isLd = isLoadSingle(Opc);
- unsigned Base = MI->getOperand(1).getReg();
+ unsigned Base = MI.getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
bool StopHere = false;
@@ -2264,15 +2318,15 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
Base2LdsMap.find(Base);
if (BI != Base2LdsMap.end()) {
for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
- if (Offset == getMemoryOpOffset(BI->second[i])) {
+ if (Offset == getMemoryOpOffset(*BI->second[i])) {
StopHere = true;
break;
}
}
if (!StopHere)
- BI->second.push_back(MI);
+ BI->second.push_back(&MI);
} else {
- Base2LdsMap[Base].push_back(MI);
+ Base2LdsMap[Base].push_back(&MI);
LdBases.push_back(Base);
}
} else {
@@ -2280,15 +2334,15 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
Base2StsMap.find(Base);
if (BI != Base2StsMap.end()) {
for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
- if (Offset == getMemoryOpOffset(BI->second[i])) {
+ if (Offset == getMemoryOpOffset(*BI->second[i])) {
StopHere = true;
break;
}
}
if (!StopHere)
- BI->second.push_back(MI);
+ BI->second.push_back(&MI);
} else {
- Base2StsMap[Base].push_back(MI);
+ Base2StsMap[Base].push_back(&MI);
StBases.push_back(Base);
}
}
@@ -2335,4 +2389,3 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
return new ARMPreAllocLoadStoreOpt();
return new ARMLoadStoreOpt();
}
-
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index a2aca2d1a69e..7429acdb09ad 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -26,33 +26,22 @@ using namespace llvm;
MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
const MCSymbol *Symbol) {
- const MCExpr *Expr;
- unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK;
- switch (Option) {
- default: {
- Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
- OutContext);
- switch (Option) {
- default: llvm_unreachable("Unknown target flag on symbol operand");
- case ARMII::MO_NO_FLAG:
- break;
- case ARMII::MO_LO16:
- Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
- OutContext);
- Expr = ARMMCExpr::createLower16(Expr, OutContext);
- break;
- case ARMII::MO_HI16:
- Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
- OutContext);
- Expr = ARMMCExpr::createUpper16(Expr, OutContext);
- break;
- }
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ switch (MO.getTargetFlags() & ARMII::MO_OPTION_MASK) {
+ default:
+ llvm_unreachable("Unknown target flag on symbol operand");
+ case ARMII::MO_NO_FLAG:
break;
- }
-
- case ARMII::MO_PLT:
- Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_PLT,
- OutContext);
+ case ARMII::MO_LO16:
+ Expr =
+ MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ Expr = ARMMCExpr::createLower16(Expr, OutContext);
+ break;
+ case ARMII::MO_HI16:
+ Expr =
+ MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ Expr = ARMMCExpr::createUpper16(Expr, OutContext);
break;
}
@@ -89,7 +78,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
break;
}
case MachineOperand::MO_ExternalSymbol:
- MCOp = GetSymbolRef(MO,
+ MCOp = GetSymbolRef(MO,
GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_JumpTableIndex:
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 71ad7a4a732a..b6dee9ff8385 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -21,4 +21,4 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
- IsSplitCSR(false) {}
+ ArgumentStackSize(0), IsSplitCSR(false) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 68f9aec8cae5..f71497240ff3 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -15,7 +15,6 @@
#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
#include "ARMSubtarget.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 30baf4263c11..73dcb9641b61 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -27,6 +27,11 @@ public:
bool runOnMachineFunction(MachineFunction &Fn) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return "optimise barriers pass";
}
@@ -46,6 +51,9 @@ static bool CanMovePastDMB(const MachineInstr *MI) {
}
bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
// Vector to store the DMBs we will remove after the first iteration
std::vector<MachineInstr *> ToRemove;
// DMBType is the Imm value of the first operand. It determines whether it's a
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 528c4ec73781..47a99313025c 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -94,7 +94,7 @@ def : PredicateProlog<[{
(void)TII;
}]>;
-def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>;
+def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for ARM
@@ -186,38 +186,50 @@ def IIC_iStore_mu : InstrItinClass;
def IIC_Preload : InstrItinClass;
def IIC_Br : InstrItinClass;
def IIC_fpSTAT : InstrItinClass;
+def IIC_fpUNA16 : InstrItinClass;
def IIC_fpUNA32 : InstrItinClass;
def IIC_fpUNA64 : InstrItinClass;
+def IIC_fpCMP16 : InstrItinClass;
def IIC_fpCMP32 : InstrItinClass;
def IIC_fpCMP64 : InstrItinClass;
def IIC_fpCVTSD : InstrItinClass;
def IIC_fpCVTDS : InstrItinClass;
def IIC_fpCVTSH : InstrItinClass;
def IIC_fpCVTHS : InstrItinClass;
+def IIC_fpCVTIH : InstrItinClass;
def IIC_fpCVTIS : InstrItinClass;
def IIC_fpCVTID : InstrItinClass;
+def IIC_fpCVTHI : InstrItinClass;
def IIC_fpCVTSI : InstrItinClass;
def IIC_fpCVTDI : InstrItinClass;
def IIC_fpMOVIS : InstrItinClass;
def IIC_fpMOVID : InstrItinClass;
def IIC_fpMOVSI : InstrItinClass;
def IIC_fpMOVDI : InstrItinClass;
+def IIC_fpALU16 : InstrItinClass;
def IIC_fpALU32 : InstrItinClass;
def IIC_fpALU64 : InstrItinClass;
+def IIC_fpMUL16 : InstrItinClass;
def IIC_fpMUL32 : InstrItinClass;
def IIC_fpMUL64 : InstrItinClass;
+def IIC_fpMAC16 : InstrItinClass;
def IIC_fpMAC32 : InstrItinClass;
def IIC_fpMAC64 : InstrItinClass;
+def IIC_fpFMAC16 : InstrItinClass;
def IIC_fpFMAC32 : InstrItinClass;
def IIC_fpFMAC64 : InstrItinClass;
+def IIC_fpDIV16 : InstrItinClass;
def IIC_fpDIV32 : InstrItinClass;
def IIC_fpDIV64 : InstrItinClass;
+def IIC_fpSQRT16 : InstrItinClass;
def IIC_fpSQRT32 : InstrItinClass;
def IIC_fpSQRT64 : InstrItinClass;
+def IIC_fpLoad16 : InstrItinClass;
def IIC_fpLoad32 : InstrItinClass;
def IIC_fpLoad64 : InstrItinClass;
def IIC_fpLoad_m : InstrItinClass;
def IIC_fpLoad_mu : InstrItinClass;
+def IIC_fpStore16 : InstrItinClass;
def IIC_fpStore32 : InstrItinClass;
def IIC_fpStore64 : InstrItinClass;
def IIC_fpStore_m : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 2c6382542ab9..ba380cba100f 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1065,11 +1065,11 @@ def CortexA8Itineraries : ProcessorItineraries<
// Cortex-A8 machine model for scheduling and other instruction cost heuristics.
def CortexA8Model : SchedMachineModel {
let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
- let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
let MispredictPenalty = 13; // Based on estimate of pipeline depth.
+ let CompleteModel = 0;
let Itineraries = CortexA8Itineraries;
}
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 9a1d22275646..519e595bd184 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -2025,12 +2025,12 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
// Define a predicate to select the LDM based on number of memory addresses.
def A9LMAdr#NumAddr#Pred :
- SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>;
+ SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>;
} // foreach NumAddr
// Fall-back for unknown LDMs.
-def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">;
+def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">;
// LDM/VLDM/VLDn address generation latency & resources.
// Dynamically select the A9WriteAdrN sequence using a predicate.
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index 3ad7730228e5..ea2bf4b578f0 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -374,7 +374,7 @@ let SchedModel = SwiftModel in {
}
// Predicate.
foreach NumAddr = 1-16 in {
- def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+ def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NumAddr>;
}
def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; }
def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 6fded9c8ab73..3b99762f7157 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -21,12 +21,9 @@ using namespace llvm;
// Emit, if possible, a specialized version of the given Libcall. Typically this
// means selecting the appropriately aligned version, but we also convert memset
// of 0 into memclr.
-SDValue ARMSelectionDAGInfo::
-EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- RTLIB::Libcall LC) const {
+SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
const ARMSubtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
@@ -121,21 +118,17 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
TLI->getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0)
+ std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
-SDValue
-ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const {
+SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
const ARMSubtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
// Do repeated 4-byte loads and stores. To be improved.
@@ -176,6 +169,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
// emit.
unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
+ // Code size optimisation: do not inline memcpy if expansion results in
+ // more instructions than the libary call.
+ if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) {
+ return SDValue();
+ }
+
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
for (unsigned I = 0; I != NumMEMCPYs; ++I) {
@@ -213,8 +212,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, dl, MVT::i32)),
- SrcPtrInfo.getWithOffset(SrcOff),
- false, false, false, 0);
+ SrcPtrInfo.getWithOffset(SrcOff));
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
@@ -237,7 +235,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, dl, MVT::i32)),
- DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ DstPtrInfo.getWithOffset(DstOff));
++i;
DstOff += VTSize;
BytesLeft -= VTSize;
@@ -246,26 +244,18 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
makeArrayRef(TFOps, i));
}
-
-SDValue ARMSelectionDAGInfo::
-EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const {
+SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
RTLIB::MEMMOVE);
}
-
-SDValue ARMSelectionDAGInfo::
-EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, SDValue Size,
- unsigned Align, bool isVolatile,
- MachinePointerInfo DstPtrInfo) const {
+SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
RTLIB::MEMSET);
}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 289879ee1d7e..2ddb42c95397 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the ARM subclass for TargetSelectionDAGInfo.
+// This file defines the ARM subclass for SelectionDAGTargetInfo.
//
//===----------------------------------------------------------------------===//
@@ -15,7 +15,8 @@
#define LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
namespace llvm {
@@ -35,35 +36,30 @@ namespace ARM_AM {
}
} // end namespace ARM_AM
-class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+class ARMSelectionDAGInfo : public SelectionDAGTargetInfo {
public:
-
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile,
+ bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
- SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
- MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const override;
+ SDValue
+ EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
// Adjust parameters for memset, see RTABI section 4.3.4
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Op1, SDValue Op2,
- SDValue Op3, unsigned Align,
- bool isVolatile,
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
- SDValue EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
+ SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
RTLIB::Libcall LC) const;
};
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index bb6ae28065bd..1d7eef9ddcfd 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -88,10 +88,9 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
- : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU),
- IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
- FrameLowering(initializeFrameLowering(CPU, FS)),
+ : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
+ CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
+ TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
InstrInfo(isThumb1Only()
@@ -102,63 +101,10 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
TLInfo(TM, *this) {}
void ARMSubtarget::initializeEnvironment() {
- HasV4TOps = false;
- HasV5TOps = false;
- HasV5TEOps = false;
- HasV6Ops = false;
- HasV6MOps = false;
- HasV6KOps = false;
- HasV6T2Ops = false;
- HasV7Ops = false;
- HasV8Ops = false;
- HasV8_1aOps = false;
- HasV8_2aOps = false;
- HasVFPv2 = false;
- HasVFPv3 = false;
- HasVFPv4 = false;
- HasFPARMv8 = false;
- HasNEON = false;
- UseNEONForSinglePrecisionFP = false;
- UseMulOps = UseFusedMulOps;
- SlowFPVMLx = false;
- HasVMLxForwarding = false;
- SlowFPBrcc = false;
- InThumbMode = false;
- UseSoftFloat = false;
- HasThumb2 = false;
- NoARM = false;
- ReserveR9 = false;
- NoMovt = false;
- SupportsTailCall = false;
- HasFP16 = false;
- HasFullFP16 = false;
- HasD16 = false;
- HasHardwareDivide = false;
- HasHardwareDivideInARM = false;
- HasT2ExtractPack = false;
- HasDataBarrier = false;
- Pref32BitThumb = false;
- AvoidCPSRPartialUpdate = false;
- AvoidMOVsShifterOperand = false;
- HasRAS = false;
- HasMPExtension = false;
- HasVirtualization = false;
- FPOnlySP = false;
- HasPerfMon = false;
- HasTrustZone = false;
- HasCrypto = false;
- HasCRC = false;
- HasZeroCycleZeroing = false;
- StrictAlign = false;
- HasDSP = false;
- UseNaClTrap = false;
- GenLongCalls = false;
- UnsafeFPMath = false;
-
// MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
// directly from it, but we can try to make sure they're consistent when both
// available.
- UseSjLjEH = isTargetDarwin() && !isTargetWatchOS();
+ UseSjLjEH = isTargetDarwin() && !isTargetWatchABI();
assert((!TM.getMCAsmInfo() ||
(TM.getMCAsmInfo()->getExceptionHandlingType() ==
ExceptionHandling::SjLj) == UseSjLjEH) &&
@@ -230,7 +176,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// registers are the 4 used for parameters. We don't currently do this
// case.
- SupportsTailCall = !isThumb1Only();
+ SupportsTailCall = !isThumb() || hasV8MBaselineOps();
if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0))
SupportsTailCall = false;
@@ -252,6 +198,53 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
(Options.UnsafeFPMath || isTargetDarwin()))
UseNEONForSinglePrecisionFP = true;
+
+ // FIXME: Teach TableGen to deal with these instead of doing it manually here.
+ switch (ARMProcFamily) {
+ case Others:
+ case CortexA5:
+ break;
+ case CortexA7:
+ LdStMultipleTiming = DoubleIssue;
+ break;
+ case CortexA8:
+ LdStMultipleTiming = DoubleIssue;
+ break;
+ case CortexA9:
+ LdStMultipleTiming = DoubleIssueCheckUnalignedAccess;
+ PreISelOperandLatencyAdjustment = 1;
+ break;
+ case CortexA12:
+ break;
+ case CortexA15:
+ MaxInterleaveFactor = 2;
+ PreISelOperandLatencyAdjustment = 1;
+ PartialUpdateClearance = 12;
+ break;
+ case CortexA17:
+ case CortexA32:
+ case CortexA35:
+ case CortexA53:
+ case CortexA57:
+ case CortexA72:
+ case CortexA73:
+ case CortexR4:
+ case CortexR4F:
+ case CortexR5:
+ case CortexR7:
+ case CortexM3:
+ case ExynosM1:
+ break;
+ case Krait:
+ PreISelOperandLatencyAdjustment = 1;
+ break;
+ case Swift:
+ MaxInterleaveFactor = 2;
+ LdStMultipleTiming = SingleIssuePlusExtras;
+ PreISelOperandLatencyAdjustment = 1;
+ PartialUpdateClearance = 12;
+ break;
+ }
}
bool ARMSubtarget::isAPCS_ABI() const {
@@ -268,40 +261,16 @@ bool ARMSubtarget::isAAPCS16_ABI() const {
return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}
+bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
+ if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return true;
-/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
-bool
-ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
- Reloc::Model RelocM) const {
- if (RelocM == Reloc::Static)
- return false;
-
- bool isDef = GV->isStrongDefinitionForLinker();
-
- if (!isTargetMachO()) {
- // Extra load is needed for all externally visible.
- if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
- return false;
+ // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
+ // the section that is being relocated. This means we have to use o load even
+ // for GVs that are known to be local to the dso.
+ if (isTargetDarwin() && TM.isPositionIndependent() &&
+ (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
return true;
- } else {
- // If this is a strong reference to a definition, it is definitely not
- // through a stub.
- if (isDef)
- return false;
-
- // Unless we have a symbol with hidden visibility, we have to go through a
- // normal $non_lazy_ptr stub because this symbol might be resolved late.
- if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
- return true;
-
- if (RelocM == Reloc::PIC_) {
- // If symbol visibility is hidden, we have a stub for common symbol
- // references and external declarations.
- if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())
- // Hidden $non_lazy_ptr reference.
- return true;
- }
- }
return false;
}
@@ -332,21 +301,21 @@ bool ARMSubtarget::enablePostRAScheduler() const {
}
bool ARMSubtarget::enableAtomicExpand() const {
- return hasAnyDataBarrier() && !isThumb1Only();
+ return hasAnyDataBarrier() && (!isThumb() || hasV8MBaselineOps());
}
bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
// For general targets, the prologue can grow when VFPs are allocated with
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
// format which it's more important to get right.
- return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize());
+ return isTargetWatchABI() || (isSwift() && !MF.getFunction()->optForMinSize());
}
bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of
// range otherwise.
- return !NoMovt && hasV6T2Ops() &&
+ return !NoMovt && hasV8MBaselineOps() &&
(isTargetWindows() || !MF.getFunction()->optForMinSize());
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 4d54e5751473..910de0e1e72d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -43,8 +43,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
- CortexA57, CortexA72, Krait, Swift, ExynosM1
+ CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexM3,
+ CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73,
+ Krait, Swift, ExynosM1
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass
@@ -52,188 +53,275 @@ protected:
enum ARMArchEnum {
ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
- ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a
+ ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline
};
+public:
+ /// What kind of timing do load multiple/store multiple instructions have.
+ enum ARMLdStMultipleTiming {
+ /// Can load/store 2 registers/cycle.
+ DoubleIssue,
+ /// Can load/store 2 registers/cycle, but needs an extra cycle if the access
+ /// is not 64-bit aligned.
+ DoubleIssueCheckUnalignedAccess,
+ /// Can load/store 1 register/cycle.
+ SingleIssue,
+ /// Can load/store 1 register/cycle, but needs an extra cycle for address
+ /// computation and potentially also for register writeback.
+ SingleIssuePlusExtras,
+ };
+
+protected:
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
- ARMProcFamilyEnum ARMProcFamily;
+ ARMProcFamilyEnum ARMProcFamily = Others;
/// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
- ARMProcClassEnum ARMProcClass;
+ ARMProcClassEnum ARMProcClass = None;
/// ARMArch - ARM architecture
- ARMArchEnum ARMArch;
+ ARMArchEnum ARMArch = ARMv4t;
/// HasV4TOps, HasV5TOps, HasV5TEOps,
/// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
- bool HasV4TOps;
- bool HasV5TOps;
- bool HasV5TEOps;
- bool HasV6Ops;
- bool HasV6MOps;
- bool HasV6KOps;
- bool HasV6T2Ops;
- bool HasV7Ops;
- bool HasV8Ops;
- bool HasV8_1aOps;
- bool HasV8_2aOps;
+ bool HasV4TOps = false;
+ bool HasV5TOps = false;
+ bool HasV5TEOps = false;
+ bool HasV6Ops = false;
+ bool HasV6MOps = false;
+ bool HasV6KOps = false;
+ bool HasV6T2Ops = false;
+ bool HasV7Ops = false;
+ bool HasV8Ops = false;
+ bool HasV8_1aOps = false;
+ bool HasV8_2aOps = false;
+ bool HasV8MBaselineOps = false;
+ bool HasV8MMainlineOps = false;
/// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
- bool HasVFPv2;
- bool HasVFPv3;
- bool HasVFPv4;
- bool HasFPARMv8;
- bool HasNEON;
+ bool HasVFPv2 = false;
+ bool HasVFPv3 = false;
+ bool HasVFPv4 = false;
+ bool HasFPARMv8 = false;
+ bool HasNEON = false;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
/// specified. Use the method useNEONForSinglePrecisionFP() to
/// determine if NEON should actually be used.
- bool UseNEONForSinglePrecisionFP;
+ bool UseNEONForSinglePrecisionFP = false;
/// UseMulOps - True if non-microcoded fused integer multiply-add and
/// multiply-subtract instructions should be used.
- bool UseMulOps;
+ bool UseMulOps = false;
/// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
/// whether the FP VML[AS] instructions are slow (if so, don't use them).
- bool SlowFPVMLx;
+ bool SlowFPVMLx = false;
/// HasVMLxForwarding - If true, NEON has special multiplier accumulator
/// forwarding to allow mul + mla being issued back to back.
- bool HasVMLxForwarding;
+ bool HasVMLxForwarding = false;
/// SlowFPBrcc - True if floating point compare + branch is slow.
- bool SlowFPBrcc;
+ bool SlowFPBrcc = false;
/// InThumbMode - True if compiling for Thumb, false for ARM.
- bool InThumbMode;
+ bool InThumbMode = false;
/// UseSoftFloat - True if we're using software floating point features.
- bool UseSoftFloat;
+ bool UseSoftFloat = false;
/// HasThumb2 - True if Thumb2 instructions are supported.
- bool HasThumb2;
+ bool HasThumb2 = false;
/// NoARM - True if subtarget does not support ARM mode execution.
- bool NoARM;
+ bool NoARM = false;
/// ReserveR9 - True if R9 is not available as a general purpose register.
- bool ReserveR9;
+ bool ReserveR9 = false;
/// NoMovt - True if MOVT / MOVW pairs are not used for materialization of
/// 32-bit imms (including global addresses).
- bool NoMovt;
+ bool NoMovt = false;
/// SupportsTailCall - True if the OS supports tail call. The dynamic linker
/// must be able to synthesize call stubs for interworking between ARM and
/// Thumb.
- bool SupportsTailCall;
+ bool SupportsTailCall = false;
/// HasFP16 - True if subtarget supports half-precision FP conversions
- bool HasFP16;
+ bool HasFP16 = false;
/// HasFullFP16 - True if subtarget supports half-precision FP operations
- bool HasFullFP16;
+ bool HasFullFP16 = false;
/// HasD16 - True if subtarget is limited to 16 double precision
/// FP registers for VFPv3.
- bool HasD16;
+ bool HasD16 = false;
/// HasHardwareDivide - True if subtarget supports [su]div
- bool HasHardwareDivide;
+ bool HasHardwareDivide = false;
/// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
- bool HasHardwareDivideInARM;
+ bool HasHardwareDivideInARM = false;
/// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
/// instructions.
- bool HasT2ExtractPack;
+ bool HasT2ExtractPack = false;
/// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
/// instructions.
- bool HasDataBarrier;
+ bool HasDataBarrier = false;
+
+ /// HasV7Clrex - True if the subtarget supports CLREX instructions
+ bool HasV7Clrex = false;
+
+ /// HasAcquireRelease - True if the subtarget supports v8 atomics (LDA/LDAEX etc)
+ /// instructions
+ bool HasAcquireRelease = false;
/// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
/// over 16-bit ones.
- bool Pref32BitThumb;
+ bool Pref32BitThumb = false;
/// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
/// that partially update CPSR and add false dependency on the previous
/// CPSR setting instruction.
- bool AvoidCPSRPartialUpdate;
+ bool AvoidCPSRPartialUpdate = false;
/// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
/// movs with shifter operand (i.e. asr, lsl, lsr).
- bool AvoidMOVsShifterOperand;
+ bool AvoidMOVsShifterOperand = false;
- /// HasRAS - Some processors perform return stack prediction. CodeGen should
+ /// HasRetAddrStack - Some processors perform return stack prediction. CodeGen should
/// avoid issue "normal" call instructions to callees which do not return.
- bool HasRAS;
+ bool HasRetAddrStack = false;
/// HasMPExtension - True if the subtarget supports Multiprocessing
/// extension (ARMv7 only).
- bool HasMPExtension;
+ bool HasMPExtension = false;
/// HasVirtualization - True if the subtarget supports the Virtualization
/// extension.
- bool HasVirtualization;
+ bool HasVirtualization = false;
/// FPOnlySP - If true, the floating point unit only supports single
/// precision.
- bool FPOnlySP;
+ bool FPOnlySP = false;
/// If true, the processor supports the Performance Monitor Extensions. These
/// include a generic cycle-counter as well as more fine-grained (often
/// implementation-specific) events.
- bool HasPerfMon;
+ bool HasPerfMon = false;
/// HasTrustZone - if true, processor supports TrustZone security extensions
- bool HasTrustZone;
+ bool HasTrustZone = false;
+
+ /// Has8MSecExt - if true, processor supports ARMv8-M Security Extensions
+ bool Has8MSecExt = false;
/// HasCrypto - if true, processor supports Cryptography extensions
- bool HasCrypto;
+ bool HasCrypto = false;
/// HasCRC - if true, processor supports CRC instructions
- bool HasCRC;
+ bool HasCRC = false;
+
+ /// HasRAS - if true, the processor supports RAS extensions
+ bool HasRAS = false;
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
/// particularly effective at zeroing a VFP register.
- bool HasZeroCycleZeroing;
+ bool HasZeroCycleZeroing = false;
+
+ /// If true, if conversion may decide to leave some instructions unpredicated.
+ bool IsProfitableToUnpredicate = false;
+
+ /// If true, VMOV will be favored over VGETLNi32.
+ bool HasSlowVGETLNi32 = false;
+
+ /// If true, VMOV will be favored over VDUP.
+ bool HasSlowVDUP32 = false;
+
+ /// If true, VMOVSR will be favored over VMOVDRR.
+ bool PreferVMOVSR = false;
+
+ /// If true, ISHST barriers will be used for Release semantics.
+ bool PreferISHST = false;
+
+ /// If true, a VLDM/VSTM starting with an odd register number is considered to
+ /// take more microops than single VLDRS/VSTRS.
+ bool SlowOddRegister = false;
+
+ /// If true, loading into a D subregister will be penalized.
+ bool SlowLoadDSubregister = false;
+
+ /// If true, the AGU and NEON/FPU units are multiplexed.
+ bool HasMuxedUnits = false;
+
+ /// If true, VMOVS will never be widened to VMOVD
+ bool DontWidenVMOVS = false;
+
+ /// If true, run the MLx expansion pass.
+ bool ExpandMLx = false;
+
+ /// If true, VFP/NEON VMLA/VMLS have special RAW hazards.
+ bool HasVMLxHazards = false;
+
+ /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
+ bool UseNEONForFPMovs = false;
+
+ /// If true, VLDn instructions take an extra cycle for unaligned accesses.
+ bool CheckVLDnAlign = false;
+
+ /// If true, VFP instructions are not pipelined.
+ bool NonpipelinedVFP = false;
/// StrictAlign - If true, the subtarget disallows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsMisalignedMemoryAccesses().
- bool StrictAlign;
+ bool StrictAlign = false;
/// RestrictIT - If true, the subtarget disallows generation of deprecated IT
/// blocks to conform to ARMv8 rule.
- bool RestrictIT;
+ bool RestrictIT = false;
/// HasDSP - If true, the subtarget supports the DSP (saturating arith
/// and such) instructions.
- bool HasDSP;
+ bool HasDSP = false;
/// NaCl TRAP instruction is generated instead of the regular TRAP.
- bool UseNaClTrap;
+ bool UseNaClTrap = false;
/// Generate calls via indirect call instructions.
- bool GenLongCalls;
+ bool GenLongCalls = false;
/// Target machine allowed unsafe FP math (such as use of NEON fp)
- bool UnsafeFPMath;
+ bool UnsafeFPMath = false;
/// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
- bool UseSjLjEH;
+ bool UseSjLjEH = false;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
- unsigned stackAlignment;
+ unsigned stackAlignment = 4;
/// CPUString - String name of used CPU.
std::string CPUString;
+ unsigned MaxInterleaveFactor = 1;
+
+ /// Clearance before partial register updates (in number of instructions)
+ unsigned PartialUpdateClearance = 0;
+
+ /// What kind of timing do load multiple/store multiple have (double issue,
+ /// single issue etc).
+ ARMLdStMultipleTiming LdStMultipleTiming = SingleIssue;
+
+ /// The adjustment that we need to apply to get the operand latency from the
+ /// operand cycle returned by the itinerary data for pre-ISel operands.
+ int PreISelOperandLatencyAdjustment = 2;
+
/// IsLittle - The target is Little Endian
bool IsLittle;
@@ -313,17 +401,23 @@ public:
bool hasV8Ops() const { return HasV8Ops; }
bool hasV8_1aOps() const { return HasV8_1aOps; }
bool hasV8_2aOps() const { return HasV8_2aOps; }
+ bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
+ bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
+ /// @{
+ /// These functions are obsolete, please consider adding subtarget features
+ /// or properties instead of calling them.
bool isCortexA5() const { return ARMProcFamily == CortexA5; }
bool isCortexA7() const { return ARMProcFamily == CortexA7; }
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
bool isCortexA15() const { return ARMProcFamily == CortexA15; }
bool isSwift() const { return ARMProcFamily == Swift; }
- bool isCortexM3() const { return CPUString == "cortex-m3"; }
+ bool isCortexM3() const { return ARMProcFamily == CortexM3; }
bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
bool isCortexR5() const { return ARMProcFamily == CortexR5; }
bool isKrait() const { return ARMProcFamily == Krait; }
+ /// @}
bool hasARMOps() const { return !NoARM; }
@@ -334,6 +428,7 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ bool hasRAS() const { return HasRAS; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP;
@@ -343,6 +438,8 @@ public:
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool hasV7Clrex() const { return HasV7Clrex; }
+ bool hasAcquireRelease() const { return HasAcquireRelease; }
bool hasAnyDataBarrier() const {
return HasDataBarrier || (hasV6Ops() && !isThumb());
}
@@ -353,11 +450,26 @@ public:
bool isFPOnlySP() const { return FPOnlySP; }
bool hasPerfMon() const { return HasPerfMon; }
bool hasTrustZone() const { return HasTrustZone; }
+ bool has8MSecExt() const { return Has8MSecExt; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; }
+ bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; }
+ bool hasSlowVDUP32() const { return HasSlowVDUP32; }
+ bool preferVMOVSR() const { return PreferVMOVSR; }
+ bool preferISHSTBarriers() const { return PreferISHST; }
+ bool expandMLx() const { return ExpandMLx; }
+ bool hasVMLxHazards() const { return HasVMLxHazards; }
+ bool hasSlowOddRegister() const { return SlowOddRegister; }
+ bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
+ bool hasMuxedUnits() const { return HasMuxedUnits; }
+ bool dontWidenVMOVS() const { return DontWidenVMOVS; }
+ bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
+ bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
+ bool nonpipelinedVFP() const { return NonpipelinedVFP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
- bool hasRAS() const { return HasRAS; }
+ bool hasRetAddrStack() const { return HasRetAddrStack; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
@@ -373,6 +485,7 @@ public:
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
+ bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
@@ -399,14 +512,21 @@ public:
TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
!isTargetDarwin() && !isTargetWindows();
}
+ bool isTargetMuslAEABI() const {
+ return (TargetTriple.getEnvironment() == Triple::MuslEABI ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF) &&
+ !isTargetDarwin() && !isTargetWindows();
+ }
// ARM Targets that support EHABI exception handling standard
// Darwin uses SjLj. Other targets might need more checks.
bool isTargetEHABICompatible() const {
return (TargetTriple.getEnvironment() == Triple::EABI ||
TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::MuslEABI ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
isTargetAndroid()) &&
!isTargetDarwin() && !isTargetWindows();
}
@@ -414,6 +534,7 @@ public:
bool isTargetHardFloat() const {
// FIXME: this is invalid for WindowsCE
return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
isTargetWindows() || isAAPCS16_ABI();
}
@@ -436,6 +557,13 @@ public:
return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
}
+ /// Returns true if the frame setup is split into two separate pushes (first
+ /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
+ /// to lr.
+ bool splitFramePushPop() const {
+ return isTargetMachO();
+ }
+
bool useStride4VFPs(const MachineFunction &MF) const;
bool useMovt(const MachineFunction &MF) const;
@@ -476,9 +604,20 @@ public:
/// function for this subtarget.
unsigned getStackAlignment() const { return stackAlignment; }
- /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
- /// symbol.
- bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+ unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+
+ unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; }
+
+ ARMLdStMultipleTiming getLdStMultipleTiming() const {
+ return LdStMultipleTiming;
+ }
+
+ int getPreISelOperandLatencyAdjustment() const {
+ return PreISelOperandLatencyAdjustment;
+ }
+
+ /// True if the GV will be accessed via an indirect symbol.
+ bool isGVIndirectSymbol(const GlobalValue *GV) const;
/// True if fast-isel is used.
bool useFastISel() const;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index fca1901dc57c..dc730a675bef 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "ARMTargetObjectFile.h"
#include "ARMTargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -53,6 +54,10 @@ extern "C" void LLVMInitializeARMTarget() {
RegisterTargetMachine<ARMBETargetMachine> Y(TheARMBETarget);
RegisterTargetMachine<ThumbLETargetMachine> A(TheThumbLETarget);
RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget);
+
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeARMLoadStoreOptPass(Registry);
+ initializeARMPreAllocLoadStoreOptPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -85,7 +90,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,
(TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
CPU.startswith("cortex-m")) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
- } else if (TT.isWatchOS()) {
+ } else if (TT.isWatchABI()) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
} else {
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
@@ -99,6 +104,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
case llvm::Triple::Android:
case llvm::Triple::GNUEABI:
case llvm::Triple::GNUEABIHF:
+ case llvm::Triple::MuslEABI:
+ case llvm::Triple::MuslEABIHF:
case llvm::Triple::EABIHF:
case llvm::Triple::EABI:
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
@@ -171,15 +178,30 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
return Ret;
}
-/// TargetMachine ctor - Create an ARM architecture model.
+static Reloc::Model getEffectiveRelocModel(const Triple &TT,
+ Optional<Reloc::Model> RM) {
+ if (!RM.hasValue())
+ // Default relocation model on Darwin is PIC.
+ return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static;
+
+ // DynamicNoPIC is only used on darwin.
+ if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin())
+ return Reloc::Static;
+
+ return *RM;
+}
+
+/// Create an ARM architecture model.
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
- CPU, FS, Options, RM, CM, OL),
+ CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM,
+ OL),
TargetABI(computeTargetABI(TT, CPU, Options)),
TLOF(createTLOF(getTargetTriple())),
Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
@@ -192,7 +214,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
// Default to triple-appropriate EABI
if (Options.EABIVersion == EABI::Default ||
Options.EABIVersion == EABI::Unknown) {
- if (Subtarget.isTargetGNUAEABI())
+ // musl is compatible with glibc with regard to EABI version
+ if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI())
this->Options.EABIVersion = EABI::GNU;
else
this->Options.EABIVersion = EABI::EABI5;
@@ -219,7 +242,6 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
// it as a key for the subtarget since that can be the only difference
// between two functions.
bool SoftFloat =
- F.hasFnAttribute("use-soft-float") &&
F.getFnAttribute("use-soft-float").getValueAsString() == "true";
// If the soft float attribute is set on the function turn on the soft float
// subtarget feature.
@@ -248,8 +270,9 @@ void ARMTargetMachine::anchor() {}
ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool isLittle)
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL,
+ bool isLittle)
: ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
initAsmInfo();
if (!Subtarget.hasARMOps())
@@ -262,7 +285,8 @@ void ARMLETargetMachine::anchor() {}
ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL)
: ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
@@ -271,7 +295,8 @@ void ARMBETargetMachine::anchor() {}
ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL)
: ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
@@ -280,7 +305,8 @@ void ThumbTargetMachine::anchor() {}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
: ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
initAsmInfo();
@@ -291,7 +317,8 @@ void ThumbLETargetMachine::anchor() {}
ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL)
: ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
@@ -300,7 +327,8 @@ void ThumbBETargetMachine::anchor() {}
ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM,
+ CodeModel::Model CM,
CodeGenOpt::Level OL)
: ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 8ad1f3dc2c34..c6b70b953162 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -39,7 +39,7 @@ protected:
public:
ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle);
~ARMBaseTargetMachine() override;
@@ -58,39 +58,40 @@ public:
}
};
-/// ARMTargetMachine - ARM target machine.
+/// ARM target machine.
///
class ARMTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
public:
ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
+ StringRef FS, const TargetOptions &Options,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool isLittle);
};
-/// ARMLETargetMachine - ARM little endian target machine.
+/// ARM little endian target machine.
///
class ARMLETargetMachine : public ARMTargetMachine {
void anchor() override;
public:
ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
-/// ARMBETargetMachine - ARM big endian target machine.
+/// ARM big endian target machine.
///
class ARMBETargetMachine : public ARMTargetMachine {
void anchor() override;
public:
ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
-/// ThumbTargetMachine - Thumb target machine.
+/// Thumb target machine.
/// Due to the way architectures are handled, this represents both
/// Thumb-1 and Thumb-2.
///
@@ -99,29 +100,29 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
public:
ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL,
- bool isLittle);
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool isLittle);
};
-/// ThumbLETargetMachine - Thumb little endian target machine.
+/// Thumb little endian target machine.
///
class ThumbLETargetMachine : public ThumbTargetMachine {
void anchor() override;
public:
ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
-/// ThumbBETargetMachine - Thumb big endian target machine.
+/// Thumb big endian target machine.
///
class ThumbBETargetMachine : public ThumbTargetMachine {
void anchor() override;
public:
ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
+ Optional<Reloc::Model> RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 98e8763c4705..b1db201cb30d 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -21,10 +21,10 @@ class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
protected:
const MCSection *AttributesSection;
public:
- ARMElfTargetObjectFile() :
- TargetLoweringObjectFileELF(),
- AttributesSection(nullptr)
- {}
+ ARMElfTargetObjectFile()
+ : TargetLoweringObjectFileELF(), AttributesSection(nullptr) {
+ PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31;
+ }
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index c1520119ef21..13c5dc61acd9 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -18,12 +18,12 @@ using namespace llvm;
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
- unsigned Bits = Ty->getPrimitiveSizeInBits();
- if (Bits == 0 || Bits > 32)
- return 4;
+ unsigned Bits = Ty->getPrimitiveSizeInBits();
+ if (Bits == 0 || Imm.getActiveBits() >= 64)
+ return 4;
- int32_t SImmVal = Imm.getSExtValue();
- uint32_t ZImmVal = Imm.getZExtValue();
+ int64_t SImmVal = Imm.getSExtValue();
+ uint64_t ZImmVal = Imm.getZExtValue();
if (!ST->isThumb()) {
if ((SImmVal >= 0 && SImmVal < 65536) ||
(ARM_AM::getSOImmVal(ZImmVal) != -1) ||
@@ -47,6 +47,32 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 3;
}
+
+// Constants smaller than 256 fit in the immediate field of
+// Thumb1 instructions so we return a zero cost and 1 otherwise.
+int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
+ return 0;
+
+ return 1;
+}
+
+int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
+ // Division by a constant can be turned into multiplication, but only if we
+ // know it's constant. So it's not so much that the immediate is cheap (it's
+ // not), but that the alternative is worse.
+ // FIXME: this is probably unneeded with GlobalISel.
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
+ Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
+ Idx == 1)
+ return 0;
+
+ return getIntImmCost(Imm, Ty);
+}
+
+
int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -244,10 +270,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
unsigned Index) {
// Penalize inserting into an D-subregister. We end up with a three times
// lower estimated throughput on swift.
- if (ST->isSwift() &&
- Opcode == Instruction::InsertElement &&
- ValTy->isVectorTy() &&
- ValTy->getScalarSizeInBits() <= 32)
+ if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
+ ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
return 3;
if ((Opcode == Instruction::InsertElement ||
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 7d8d2381c983..a0ca9e648002 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -54,12 +54,24 @@ public:
bool enableInterleavedAccessVectorization() { return true; }
+ /// Floating-point computation using ARMv8 AArch32 Advanced
+ /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
+ /// is IEEE-754 compliant, but it's not covered in this target.
+ bool isFPVectorizationPotentiallyUnsafe() {
+ return !ST->isTargetDarwin();
+ }
+
/// \name Scalar TTI Implementations
/// @{
+ int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+
using BaseT::getIntImmCost;
int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+
/// @}
/// \name Vector TTI Implementations
@@ -88,10 +100,7 @@ public:
}
unsigned getMaxInterleaveFactor(unsigned VF) {
- // These are out of order CPUs:
- if (ST->isCortexA15() || ST->isSwift())
- return 2;
- return 1;
+ return ST->getMaxInterleaveFactor();
}
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c69a741244cf..7d49302f9a96 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -20,7 +20,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -31,20 +31,20 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserUtils.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -257,9 +257,15 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasThumb() const {
return getSTI().getFeatureBits()[ARM::HasV4TOps];
}
+ bool hasThumb2() const {
+ return getSTI().getFeatureBits()[ARM::FeatureThumb2];
+ }
bool hasV6Ops() const {
return getSTI().getFeatureBits()[ARM::HasV6Ops];
}
+ bool hasV6T2Ops() const {
+ return getSTI().getFeatureBits()[ARM::HasV6T2Ops];
+ }
bool hasV6MOps() const {
return getSTI().getFeatureBits()[ARM::HasV6MOps];
}
@@ -269,6 +275,15 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasV8Ops() const {
return getSTI().getFeatureBits()[ARM::HasV8Ops];
}
+ bool hasV8MBaseline() const {
+ return getSTI().getFeatureBits()[ARM::HasV8MBaselineOps];
+ }
+ bool hasV8MMainline() const {
+ return getSTI().getFeatureBits()[ARM::HasV8MMainlineOps];
+ }
+ bool has8MSecExt() const {
+ return getSTI().getFeatureBits()[ARM::Feature8MSecExt];
+ }
bool hasARM() const {
return !getSTI().getFeatureBits()[ARM::FeatureNoARM];
}
@@ -281,12 +296,16 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasV8_1aOps() const {
return getSTI().getFeatureBits()[ARM::HasV8_1aOps];
}
+ bool hasRAS() const {
+ return getSTI().getFeatureBits()[ARM::FeatureRAS];
+ }
void SwitchMode() {
MCSubtargetInfo &STI = copySTI();
uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
+ void FixModeAfterArchChange(bool WasThumb, SMLoc Loc);
bool isMClass() const {
return getSTI().getFeatureBits()[ARM::FeatureMClass];
}
@@ -417,8 +436,9 @@ class ARMOperand : public MCParsedAsmOperand {
k_ShifterImmediate,
k_RotateImmediate,
k_ModifiedImmediate,
+ k_ConstantPoolImmediate,
k_BitfieldDescriptor,
- k_Token
+ k_Token,
} Kind;
SMLoc StartLoc, EndLoc, AlignmentLoc;
@@ -611,6 +631,11 @@ public:
return Imm.Val;
}
+ const MCExpr *getConstantPoolImm() const {
+ assert(isConstantPoolImm() && "Invalid access!");
+ return Imm.Val;
+ }
+
unsigned getVectorIndex() const {
assert(Kind == k_VectorIndex && "Invalid access!");
return VectorIndex.Val;
@@ -648,7 +673,27 @@ public:
bool isCCOut() const { return Kind == k_CCOut; }
bool isITMask() const { return Kind == k_ITCondMask; }
bool isITCondCode() const { return Kind == k_CondCode; }
- bool isImm() const override { return Kind == k_Immediate; }
+ bool isImm() const override {
+ return Kind == k_Immediate;
+ }
+
+ bool isARMBranchTarget() const {
+ if (!isImm()) return false;
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()))
+ return CE->getValue() % 4 == 0;
+ return true;
+ }
+
+
+ bool isThumbBranchTarget() const {
+ if (!isImm()) return false;
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()))
+ return CE->getValue() % 2 == 0;
+ return true;
+ }
+
// checks whether this operand is an unsigned offset which fits is a field
// of specified width and scaled by a specific number of bits
template<unsigned width, unsigned scale>
@@ -1036,6 +1081,7 @@ public:
return ARM_AM::getSOImmVal(Value) == -1 &&
ARM_AM::getSOImmVal(-Value) != -1;
}
+ bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; }
bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
bool isPostIdxReg() const {
@@ -1183,6 +1229,20 @@ public:
return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
Val == INT32_MIN;
}
+ bool isAddrMode5FP16() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+ if (!isMem() || Memory.Alignment != 0) return false;
+ // Check for register offset.
+ if (Memory.OffsetRegNum) return false;
+ // Immediate offset in range [-510, 510] and a multiple of 2.
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ return (Val >= -510 && Val <= 510 && ((Val & 1) == 0)) || Val == INT32_MIN;
+ }
bool isMemTBB() const {
if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
@@ -1203,7 +1263,7 @@ public:
}
bool isT2MemRegOffset() const {
if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
- Memory.Alignment != 0)
+ Memory.Alignment != 0 || Memory.BaseRegNum == ARM::PC)
return false;
// Only lsl #{0, 1, 2, 3} allowed.
if (Memory.ShiftType == ARM_AM::no_shift)
@@ -1319,6 +1379,7 @@ public:
// If we have an immediate that's not a constant, treat it as a label
// reference needing a fixup. If it is a constant, it's something else
// and we reject it.
+
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
@@ -1329,6 +1390,11 @@ public:
int64_t Val = Memory.OffsetImm->getValue();
return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
}
+ bool isConstPoolAsmImm() const {
+ // Delay processing of Constant Pool Immediate, this will turn into
+ // a constant. Match no other operand
+ return (isConstantPoolImm());
+ }
bool isPostIdxImm8() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -1665,7 +1731,7 @@ public:
if (!CE) return false;
uint64_t Value = CE->getValue();
// i64 value with each byte being either 0 or 0xff.
- for (unsigned i = 0; i < 8; ++i)
+ for (unsigned i = 0; i < 8; ++i, Value >>= 8)
if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) return false;
return true;
}
@@ -1680,6 +1746,16 @@ public:
Inst.addOperand(MCOperand::createExpr(Expr));
}
+ void addARMBranchTargetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addThumbBranchTargetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
void addCondCodeOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(unsigned(getCondCode())));
@@ -1941,6 +2017,7 @@ public:
}
const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val);
+
assert(SR && "Unknown value type!");
Inst.addOperand(MCOperand::createExpr(SR));
return;
@@ -2145,6 +2222,28 @@ public:
Inst.addOperand(MCOperand::createImm(Val));
}
+ void addAddrMode5FP16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ Inst.addOperand(MCOperand::createImm(0));
+ return;
+ }
+
+ // The lower bit is always zero and as such is not encoded.
+ int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 2 : 0;
+ ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
+ // Special case for #-0
+ if (Val == INT32_MIN) Val = 0;
+ if (Val < 0) Val = -Val;
+ Val = ARM_AM::getAM5FP16Opc(AddSub, Val);
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
// If we have an immediate that's not a constant, treat it as a label
@@ -2214,6 +2313,14 @@ public:
Inst.addOperand(MCOperand::createImm(Val));
}
+ void addConstPoolAsmImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // This is container for the immediate that we will create the constant
+ // pool from
+ addExpr(Inst, getConstantPoolImm());
+ return;
+ }
+
void addMemTBBOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
@@ -2594,6 +2701,15 @@ public:
}
static std::unique_ptr<ARMOperand>
+ CreateConstantPoolImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ConstantPoolImmediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<ARMOperand>
CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) {
auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor);
Op->Bitfield.LSB = LSB;
@@ -2850,6 +2966,9 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<mod_imm #" << ModImm.Bits << ", #"
<< ModImm.Rot << ")>";
break;
+ case k_ConstantPoolImmediate:
+ OS << "<constant_pool_imm #" << *getConstantPoolImm();
+ break;
case k_BitfieldDescriptor:
OS << "<bitfield " << "lsb: " << Bitfield.LSB
<< ", width: " << Bitfield.Width << ">";
@@ -3969,6 +4088,18 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
.Case("basepri_max", 0x812)
.Case("faultmask", 0x813)
.Case("control", 0x814)
+ .Case("msplim", 0x80a)
+ .Case("psplim", 0x80b)
+ .Case("msp_ns", 0x888)
+ .Case("psp_ns", 0x889)
+ .Case("msplim_ns", 0x88a)
+ .Case("psplim_ns", 0x88b)
+ .Case("primask_ns", 0x890)
+ .Case("basepri_ns", 0x891)
+ .Case("basepri_max_ns", 0x892)
+ .Case("faultmask_ns", 0x893)
+ .Case("control_ns", 0x894)
+ .Case("sp_ns", 0x898)
.Default(~0U);
if (FlagsVal == ~0U)
@@ -3983,6 +4114,14 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
// basepri, basepri_max and faultmask only valid for V7m.
return MatchOperand_NoMatch;
+ if (!has8MSecExt() && (FlagsVal == 0x80a || FlagsVal == 0x80b ||
+ (FlagsVal > 0x814 && FlagsVal < 0xc00)))
+ return MatchOperand_NoMatch;
+
+ if (!hasV8MMainline() && (FlagsVal == 0x88a || FlagsVal == 0x88b ||
+ (FlagsVal > 0x890 && FlagsVal <= 0x893)))
+ return MatchOperand_NoMatch;
+
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
return MatchOperand_Success;
@@ -4673,14 +4812,14 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
// classify tB as either t2B or t1B based on range of immediate operand
case ARM::tB: {
ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
- if (!op.isSignedOffset<11, 1>() && isThumbTwo())
+ if (!op.isSignedOffset<11, 1>() && isThumb() && hasV8MBaseline())
Inst.setOpcode(ARM::t2B);
break;
}
// classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand
case ARM::tBcc: {
ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
- if (!op.isSignedOffset<8, 1>() && isThumbTwo())
+ if (!op.isSignedOffset<8, 1>() && isThumb() && hasV8MBaseline())
Inst.setOpcode(ARM::t2Bcc);
break;
}
@@ -4973,7 +5112,8 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
// vmov.i{8|16|32|64} <dreg|qreg>, #imm
ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]);
bool isVmovf = TyOp.isToken() &&
- (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64");
+ (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64" ||
+ TyOp.getToken() == ".f16");
ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]);
bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" ||
Mnemonic.getToken() == "fconsts");
@@ -5144,16 +5284,12 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
S = Parser.getTok().getLoc();
if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
return Error(S, "unexpected token in operand");
-
Parser.Lex(); // Eat '='
const MCExpr *SubExprVal;
if (getParser().parseExpression(SubExprVal))
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-
- const MCExpr *CPLoc =
- getTargetStreamer().addConstantPoolEntry(SubExprVal, S);
- Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E));
+ Operands.push_back(ARMOperand::CreateConstantPoolImm(SubExprVal, S, E));
return false;
}
}
@@ -5265,7 +5401,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" ||
Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" ||
Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
- Mnemonic.startswith("vsel"))
+ Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
+ Mnemonic == "bxns" || Mnemonic == "blxns")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -5311,6 +5448,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
Mnemonic == "vfms" || Mnemonic == "vfnms" || Mnemonic == "fconsts" ||
+ Mnemonic == "bxns" || Mnemonic == "blxns" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
@@ -5369,7 +5507,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" ||
Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
- (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
+ (FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
+ Mnemonic == "vmovx" || Mnemonic == "vins") {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
@@ -6405,6 +6544,20 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"immediate expression for mov requires :lower16: or :upper16");
break;
}
+ case ARM::HINT:
+ case ARM::t2HINT: {
+ if (hasRAS()) {
+ // ESB is not predicable (pred must be AL)
+ unsigned Imm8 = Inst.getOperand(0).getImm();
+ unsigned Pred = Inst.getOperand(1).getImm();
+ if (Imm8 == 0x10 && Pred != ARMCC::AL)
+ return Error(Operands[1]->getStartLoc(), "instruction 'esb' is not "
+ "predicable, but condition "
+ "code specified");
+ }
+ // Without the RAS extension, this behaves as any other unallocated hint.
+ break;
+ }
}
return false;
@@ -6766,6 +6919,90 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
case ARM::t2LDRSHpcrel:
Inst.setOpcode(ARM::t2LDRSHpci);
return true;
+ case ARM::LDRConstPool:
+ case ARM::tLDRConstPool:
+ case ARM::t2LDRConstPool: {
+ // Pseudo instruction ldr rt, =immediate is converted to a
+ // MOV rt, immediate if immediate is known and representable
+ // otherwise we create a constant pool entry that we load from.
+ MCInst TmpInst;
+ if (Inst.getOpcode() == ARM::LDRConstPool)
+ TmpInst.setOpcode(ARM::LDRi12);
+ else if (Inst.getOpcode() == ARM::tLDRConstPool)
+ TmpInst.setOpcode(ARM::tLDRpci);
+ else if (Inst.getOpcode() == ARM::t2LDRConstPool)
+ TmpInst.setOpcode(ARM::t2LDRpci);
+ const ARMOperand &PoolOperand =
+ static_cast<ARMOperand &>(*Operands[3]);
+ const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm();
+ // If SubExprVal is a constant we may be able to use a MOV
+ if (isa<MCConstantExpr>(SubExprVal) &&
+ Inst.getOperand(0).getReg() != ARM::PC &&
+ Inst.getOperand(0).getReg() != ARM::SP) {
+ int64_t Value =
+ (int64_t) (cast<MCConstantExpr>(SubExprVal))->getValue();
+ bool UseMov = true;
+ bool MovHasS = true;
+ if (Inst.getOpcode() == ARM::LDRConstPool) {
+ // ARM Constant
+ if (ARM_AM::getSOImmVal(Value) != -1) {
+ Value = ARM_AM::getSOImmVal(Value);
+ TmpInst.setOpcode(ARM::MOVi);
+ }
+ else if (ARM_AM::getSOImmVal(~Value) != -1) {
+ Value = ARM_AM::getSOImmVal(~Value);
+ TmpInst.setOpcode(ARM::MVNi);
+ }
+ else if (hasV6T2Ops() &&
+ Value >=0 && Value < 65536) {
+ TmpInst.setOpcode(ARM::MOVi16);
+ MovHasS = false;
+ }
+ else
+ UseMov = false;
+ }
+ else {
+ // Thumb/Thumb2 Constant
+ if (hasThumb2() &&
+ ARM_AM::getT2SOImmVal(Value) != -1)
+ TmpInst.setOpcode(ARM::t2MOVi);
+ else if (hasThumb2() &&
+ ARM_AM::getT2SOImmVal(~Value) != -1) {
+ TmpInst.setOpcode(ARM::t2MVNi);
+ Value = ~Value;
+ }
+ else if (hasV8MBaseline() &&
+ Value >=0 && Value < 65536) {
+ TmpInst.setOpcode(ARM::t2MOVi16);
+ MovHasS = false;
+ }
+ else
+ UseMov = false;
+ }
+ if (UseMov) {
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(MCOperand::createImm(Value)); // Immediate
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ if (MovHasS)
+ TmpInst.addOperand(MCOperand::createReg(0)); // S
+ Inst = TmpInst;
+ return true;
+ }
+ }
+ // No opportunity to use MOV/MVN create constant pool
+ const MCExpr *CPLoc =
+ getTargetStreamer().addConstantPoolEntry(SubExprVal,
+ PoolOperand.getStartLoc());
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(MCOperand::createExpr(CPLoc)); // offset to constpool
+ if (TmpInst.getOpcode() == ARM::LDRi12)
+ TmpInst.addOperand(MCOperand::createImm(0)); // unused offset
+ TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
// Handle NEON VST complex aliases.
case ARM::VST1LNdWB_register_Asm_8:
case ARM::VST1LNdWB_register_Asm_16:
@@ -9031,6 +9268,31 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
return false;
}
+// After changing arch/CPU, try to put the ARM/Thumb mode back to what it was
+// before, if supported by the new target, or emit mapping symbols for the mode
+// switch.
+void ARMAsmParser::FixModeAfterArchChange(bool WasThumb, SMLoc Loc) {
+ if (WasThumb != isThumb()) {
+ if (WasThumb && hasThumb()) {
+ // Stay in Thumb mode
+ SwitchMode();
+ } else if (!WasThumb && hasARM()) {
+ // Stay in ARM mode
+ SwitchMode();
+ } else {
+ // Mode switch forced, because the new arch doesn't support the old mode.
+ getParser().getStreamer().EmitAssemblerFlag(isThumb() ? MCAF_Code16
+ : MCAF_Code32);
+ // Warn about the implcit mode switch. GAS does not switch modes here,
+ // but instead stays in the old mode, reporting an error on any following
+ // instructions as the mode does not exist on the target.
+ Warning(Loc, Twine("new target does not support ") +
+ (WasThumb ? "thumb" : "arm") + " mode, switching to " +
+ (!WasThumb ? "thumb" : "arm") + " mode");
+ }
+ }
+}
+
/// parseDirectiveArch
/// ::= .arch token
bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
@@ -9043,10 +9305,12 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
return false;
}
+ bool WasThumb = isThumb();
Triple T;
MCSubtargetInfo &STI = copySTI();
STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str());
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ FixModeAfterArchChange(WasThumb, L);
getTargetStreamer().emitArch(ID);
return false;
@@ -9177,9 +9441,11 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
return false;
}
+ bool WasThumb = isThumb();
MCSubtargetInfo &STI = copySTI();
STI.setDefaultFeatures(CPU, "");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ FixModeAfterArchChange(WasThumb, L);
return false;
}
@@ -9834,7 +10100,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
StringRef Arch = Parser.getTok().getString();
SMLoc ArchLoc = Parser.getTok().getLoc();
- getLexer().Lex();
+ Lex();
unsigned ID = ARM::parseArch(Arch);
@@ -9863,7 +10129,9 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
return true;
// '.align' is target specifically handled to mean 2**2 byte alignment.
- if (getStreamer().getCurrentSection().first->UseCodeAlign())
+ const MCSection *Section = getStreamer().getCurrentSection().first;
+ assert(Section && "must have section to emit alignment");
+ if (Section->UseCodeAlign())
getStreamer().EmitCodeAlignment(4, 0);
else
getStreamer().EmitValueToAlignment(4, 0, 1, 0);
@@ -9933,6 +10201,7 @@ static const struct {
// FIXME: Only available in A-class, isel not predicated
{ ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
{ ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
+ { ARM::AEK_RAS, Feature_HasV8, {ARM::FeatureRAS} },
// FIXME: Unsupported extensions.
{ ARM::AEK_OS, Feature_None, {} },
{ ARM::AEK_IWMMXT, Feature_None, {} },
@@ -9954,7 +10223,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
StringRef Name = Parser.getTok().getString();
SMLoc ExtLoc = Parser.getTok().getLoc();
- getLexer().Lex();
+ Lex();
bool EnableFeature = true;
if (Name.startswith_lower("no")) {
diff --git a/lib/Target/ARM/AsmParser/Makefile b/lib/Target/ARM/AsmParser/Makefile
deleted file mode 100644
index 841516fffbd5..000000000000
--- a/lib/Target/ARM/AsmParser/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMAsmParser
-
-# Hack: we need to include 'main' ARM target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e63defed2288..3196a57ccc3e 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
@@ -210,6 +210,8 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
@@ -222,6 +224,8 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
@@ -391,8 +395,8 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
static MCDisassembler *createARMDisassembler(const Target &T,
@@ -590,6 +594,8 @@ MCDisassembler::DecodeStatus
ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
MCDisassembler::DecodeStatus S = Success;
+ const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits();
+
// A few instructions actually have predicates encoded in them. Don't
// try to overwrite it if we're seeing one of those.
switch (MI.getOpcode()) {
@@ -610,6 +616,10 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
else
return Success;
break;
+ case ARM::t2HINT:
+ if (MI.getOperand(0).getImm() == 0x10 && (FeatureBits[ARM::FeatureRAS]) != 0)
+ S = SoftFail;
+ break;
case ARM::tB:
case ARM::t2B:
case ARM::t2TBB:
@@ -1941,6 +1951,29 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
return S;
}
+// Check for UNPREDICTABLE predicated ESB instruction
+static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned imm8 = fieldFromInstruction(Insn, 0, 8);
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ const FeatureBitset &FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
+
+ DecodeStatus S = MCDisassembler::Success;
+
+ Inst.addOperand(MCOperand::createImm(imm8));
+
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // ESB is unpredictable if pred != AL. Without the RAS extension, it is a NOP,
+ // so all predicates should be allowed.
+ if (imm8 == 0x10 && pred != 0xe && ((FeatureBits[ARM::FeatureRAS]) != 0))
+ S = MCDisassembler::SoftFail;
+
+ return S;
+}
+
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned imod = fieldFromInstruction(Insn, 18, 2);
@@ -2183,6 +2216,7 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ // U == 1 to add imm, 0 to subtract it.
unsigned U = fieldFromInstruction(Val, 8, 1);
unsigned imm = fieldFromInstruction(Val, 0, 8);
@@ -2197,6 +2231,26 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
return S;
}
+static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 9, 4);
+ // U == 1 to add imm, 0 to subtract it.
+ unsigned U = fieldFromInstruction(Val, 8, 1);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if (U)
+ Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5FP16Opc(ARM_AM::add, imm)));
+ else
+ Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5FP16Opc(ARM_AM::sub, imm)));
+
+ return S;
+}
+
static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
@@ -4096,6 +4150,24 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
// Values basepri, basepri_max and faultmask are only valid for v7m.
return MCDisassembler::Fail;
break;
+ case 0x8a: // msplim_ns
+ case 0x8b: // psplim_ns
+ case 0x91: // basepri_ns
+ case 0x92: // basepri_max_ns
+ case 0x93: // faultmask_ns
+ if (!(FeatureBits[ARM::HasV8MMainlineOps]))
+ return MCDisassembler::Fail;
+ // fall through
+ case 10: // msplim
+ case 11: // psplim
+ case 0x88: // msp_ns
+ case 0x89: // psp_ns
+ case 0x90: // primask_ns
+ case 0x94: // control_ns
+ case 0x98: // sp_ns
+ if (!(FeatureBits[ARM::Feature8MSecExt]))
+ return MCDisassembler::Fail;
+ break;
default:
return MCDisassembler::Fail;
}
@@ -5193,8 +5265,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -5210,12 +5282,30 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
if (Rt == Rt2)
S = MCDisassembler::SoftFail;
+ // We have to check if the instruction is MRRC2
+ // or MCRR2 when constructing the operands for
+ // Inst. Reason is because MRRC2 stores to two
+ // registers so it's tablegen desc has has two
+ // outputs whereas MCRR doesn't store to any
+ // registers so all of it's operands are listed
+ // as inputs, therefore the operand order for
+ // MRRC2 needs to be [Rt, Rt2, cop, opc1, CRm]
+ // and MCRR2 operand order is [cop, opc1, Rt, Rt2, CRm]
+
+ if (Inst.getOpcode() == ARM::MRRC2) {
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
Inst.addOperand(MCOperand::createImm(cop));
Inst.addOperand(MCOperand::createImm(opc1));
- if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
- return MCDisassembler::Fail;
+ if (Inst.getOpcode() == ARM::MCRR2) {
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
Inst.addOperand(MCOperand::createImm(CRm));
return S;
diff --git a/lib/Target/ARM/Disassembler/Makefile b/lib/Target/ARM/Disassembler/Makefile
deleted file mode 100644
index 031b6aca5a48..000000000000
--- a/lib/Target/ARM/Disassembler/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMDisassembler
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 33fc85af9b19..e81bb77dbdfc 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+#define PRINT_ALIAS_INSTR
#include "ARMGenAsmWriter.inc"
/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
@@ -73,43 +74,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
switch (Opcode) {
- // Check for HINT instructions w/ canonical names.
- case ARM::HINT:
- case ARM::tHINT:
- case ARM::t2HINT:
- switch (MI->getOperand(0).getImm()) {
- case 0:
- O << "\tnop";
- break;
- case 1:
- O << "\tyield";
- break;
- case 2:
- O << "\twfe";
- break;
- case 3:
- O << "\twfi";
- break;
- case 4:
- O << "\tsev";
- break;
- case 5:
- if (STI.getFeatureBits()[ARM::HasV8Ops]) {
- O << "\tsevl";
- break;
- } // Fallthrough for non-v8
- default:
- // Anything else should just print normally.
- printInstruction(MI, STI, O);
- printAnnotation(O, Annot);
- return;
- }
- printPredicateOperand(MI, 1, STI, O);
- if (Opcode == ARM::t2HINT)
- O << ".w";
- printAnnotation(O, Annot);
- return;
-
// Check for MOVs and print canonical forms, instead.
case ARM::MOVsr: {
// FIXME: Thumb variants?
@@ -297,23 +261,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
break;
}
- // B9.3.3 ERET (Thumb)
- // For a target that has Virtualization Extensions, ERET is the preferred
- // disassembly of SUBS PC, LR, #0
- case ARM::t2SUBS_PC_LR: {
- if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() &&
- MI->getOperand(0).getImm() == 0 &&
- STI.getFeatureBits()[ARM::FeatureVirtualization]) {
- O << "\teret";
- printPredicateOperand(MI, 1, STI, O);
- printAnnotation(O, Annot);
- return;
- }
- break;
- }
}
- printInstruction(MI, STI, O);
+ if (!printAliasInstr(MI, STI, O))
+ printInstruction(MI, STI, O);
+
printAnnotation(O, Annot);
}
@@ -645,6 +597,34 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
O << "]" << markup(">");
}
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, STI, O);
+ return;
+ }
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm());
+ unsigned Op = ARM_AM::getAM5FP16Op(MO2.getImm());
+ if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
+ O << ", "
+ << markup("<imm:")
+ << "#"
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM5FP16Op(MO2.getImm()))
+ << ImmOffs * 2
+ << markup(">");
+ }
+ O << "]" << markup(">");
+}
+
void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -901,6 +881,42 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
case 20:
O << "control";
return;
+ case 10:
+ O << "msplim";
+ return;
+ case 11:
+ O << "psplim";
+ return;
+ case 0x88:
+ O << "msp_ns";
+ return;
+ case 0x89:
+ O << "psp_ns";
+ return;
+ case 0x8a:
+ O << "msplim_ns";
+ return;
+ case 0x8b:
+ O << "psplim_ns";
+ return;
+ case 0x90:
+ O << "primask_ns";
+ return;
+ case 0x91:
+ O << "basepri_ns";
+ return;
+ case 0x92:
+ O << "basepri_max_ns";
+ return;
+ case 0x93:
+ O << "faultmask_ns";
+ return;
+ case 0x94:
+ O << "control_ns";
+ return;
+ case 0x98:
+ O << "sp_ns";
+ return;
}
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 52f7115f0558..9d80eed84dc2 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -30,6 +30,12 @@ public:
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
+ virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -71,6 +77,9 @@ public:
template <bool AlwaysPrintImm0>
void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
+ void printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/InstPrinter/Makefile b/lib/Target/ARM/InstPrinter/Makefile
deleted file mode 100644
index 65d372e44b88..000000000000
--- a/lib/Target/ARM/InstPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMAsmPrinter
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index b03cada9a641..3959eab966a8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -486,7 +486,7 @@ namespace ARM_AM {
// addrmode5 := reg +/- imm8*4
//
// The first operand is always a Reg. The second operand encodes the
- // operation in bit 8 and the immediate in bits 0-7.
+ // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
/// getAM5Opc - This function encodes the addrmode5 opc field.
static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
@@ -501,6 +501,29 @@ namespace ARM_AM {
}
//===--------------------------------------------------------------------===//
+ // Addressing Mode #5 FP16
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as 16-bit FP load/stores.
+ //
+ // addrmode5fp16 := reg +/- imm8*2
+ //
+ // The first operand is always a Reg. The second operand encodes the
+ // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
+
+ /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
+ static inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5FP16Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5FP16Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
// Addressing Mode #6
//===--------------------------------------------------------------------===//
//
@@ -650,6 +673,32 @@ namespace ARM_AM {
return FPUnion.F;
}
+ /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
+ /// floating-point value. If the value cannot be represented as an 8-bit
+ /// floating-point value, then return -1.
+ static inline int getFP16Imm(const APInt &Imm) {
+ uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
+ int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x3f)
+ return -1;
+ Mantissa >>= 6;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+ }
+
+ static inline int getFP16Imm(const APFloat &FPImm) {
+ return getFP16Imm(FPImm.bitcastToAPInt());
+ }
+
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index fa52c9354c17..0fc758201d47 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -46,6 +46,7 @@ public:
: MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
/*HasRelocationAddend*/ false) {}
};
+} // end anonymous namespace
const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
@@ -62,6 +63,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_t2_pcrel_10", 0, 32,
MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_pcrel_9", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_thumb_adr_pcrel_10", 0, 8,
MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
@@ -78,7 +83,9 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
- {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_blx", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_thumb_cp", 0, 8,
MCFixupKindInfo::FKF_IsPCRel |
@@ -90,6 +97,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_arm_movw_lo16", 0, 20, 0},
{"fixup_t2_movt_hi16", 0, 20, 0},
{"fixup_t2_movw_lo16", 0, 20, 0},
+ {"fixup_arm_mod_imm", 0, 12, 0},
};
const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
@@ -105,6 +113,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_t2_pcrel_10", 0, 32,
MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_pcrel_9", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_thumb_adr_pcrel_10", 8, 8,
MCFixupKindInfo::FKF_IsPCRel |
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
@@ -121,7 +133,9 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
- {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_blx", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
{"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
{"fixup_arm_thumb_cp", 8, 8,
MCFixupKindInfo::FKF_IsPCRel |
@@ -133,6 +147,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_arm_movw_lo16", 12, 20, 0},
{"fixup_t2_movt_hi16", 12, 20, 0},
{"fixup_t2_movw_lo16", 12, 20, 0},
+ {"fixup_arm_mod_imm", 20, 12, 0},
};
if (Kind < FirstTargetFixupKind)
@@ -155,10 +170,10 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
break;
}
}
-} // end anonymous namespace
unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
bool HasThumb2 = STI->getFeatureBits()[ARM::FeatureThumb2];
+ bool HasV8MBaselineOps = STI->getFeatureBits()[ARM::HasV8MBaselineOps];
switch (Op) {
default:
@@ -170,7 +185,7 @@ unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
case ARM::tADR:
return HasThumb2 ? (unsigned)ARM::t2ADR : Op;
case ARM::tB:
- return HasThumb2 ? (unsigned)ARM::t2B : Op;
+ return HasV8MBaselineOps ? (unsigned)ARM::t2B : Op;
case ARM::tCBZ:
return ARM::tHINT;
case ARM::tCBNZ:
@@ -243,7 +258,9 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
return reasonForFixupRelaxation(Fixup, Value);
}
-void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+void ARMAsmBackend::relaxInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI,
+ MCInst &Res) const {
unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
// Sanity check w/ diagnostic if we get here w/ a bogus instruction.
@@ -449,7 +466,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Offset by 8 just as above.
if (const MCSymbolRefExpr *SRE =
dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
- if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
+ if (SRE->getKind() == MCSymbolRefExpr::VK_TLSCALL)
return 0;
return 0xffffff & ((Value - 8) >> 2);
case ARM::fixup_t2_uncondbranch: {
@@ -524,10 +541,15 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
//
// Note that the halfwords are stored high first, low second; so we need
// to transpose the fixup value here to map properly.
- uint32_t offset = (Value - 2) >> 2;
+ if (Ctx && Value % 4 != 0) {
+ Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination");
+ return 0;
+ }
+
+ uint32_t offset = (Value - 4) >> 2;
if (const MCSymbolRefExpr *SRE =
dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
- if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
+ if (SRE->getKind() == MCSymbolRefExpr::VK_TLSCALL)
offset = 0;
uint32_t signBit = (offset & 0x400000) >> 22;
uint32_t I1Bit = (offset & 0x200000) >> 21;
@@ -563,7 +585,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] &&
+ !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
@@ -624,6 +647,44 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
}
+ case ARM::fixup_arm_pcrel_9:
+ Value = Value - 4; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ // Fall through.
+ case ARM::fixup_t2_pcrel_9: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value = Value - 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // These values don't encode the low bit since it's always zero.
+ if (Ctx && (Value & 1)) {
+ Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup");
+ return 0;
+ }
+ Value >>= 1;
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_9, but with 16-bit halfwords
+ // swapped.
+ if (Kind == ARM::fixup_t2_pcrel_9)
+ return swapHalfWords(Value, IsLittleEndian);
+
+ return Value;
+ }
+ case ARM::fixup_arm_mod_imm:
+ Value = ARM_AM::getSOImmVal(Value);
+ if (Ctx && Value >> 12) {
+ Ctx->reportError(Fixup.getLoc(), "out of range immediate fixup value");
+ return 0;
+ }
+ return Value;
}
}
@@ -690,11 +751,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case FK_Data_2:
case ARM::fixup_arm_thumb_br:
case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_mod_imm:
return 2;
case ARM::fixup_arm_pcrel_10_unscaled:
case ARM::fixup_arm_ldst_pcrel_12:
case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_pcrel_9:
case ARM::fixup_arm_adr_pcrel_12:
case ARM::fixup_arm_uncondbl:
case ARM::fixup_arm_condbl:
@@ -708,6 +771,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_t2_condbranch:
case ARM::fixup_t2_uncondbranch:
case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_pcrel_9:
case ARM::fixup_t2_adr_pcrel_12:
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
@@ -766,6 +830,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_arm_mod_imm:
// Instruction size is 4 bytes.
return 4;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 28a62132a419..84caaacc47d3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -11,12 +11,12 @@
#define LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-namespace {
+namespace llvm {
class ARMAsmBackend : public MCAsmBackend {
const MCSubtargetInfo *STI;
@@ -63,7 +63,8 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override;
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
@@ -74,6 +75,6 @@ public:
void setIsThumb(bool it) { isThumbMode = it; }
bool isLittle() const { return IsLittleEndian; }
};
-} // end anonymous namespace
+} // end namespace llvm
#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 995dd0fe08ee..09dc0173ade6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -10,11 +10,10 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+#include "ARMAsmBackend.h"
#include "llvm/Support/MachO.h"
-using namespace llvm;
-
-namespace {
+namespace llvm {
class ARMAsmBackendDarwin : public ARMAsmBackend {
const MCRegisterInfo &MRI;
public:
@@ -22,7 +21,6 @@ public:
ARMAsmBackendDarwin(const Target &T, const Triple &TT,
const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
: ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
- HasDataInCodeSupport = true;
}
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
@@ -33,6 +31,6 @@ public:
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override;
};
-}
+} // end namespace llvm
#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 68b12edd089e..748f915be17b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -10,7 +10,10 @@
#ifndef LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
#define LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+#include "ARMAsmBackend.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
using namespace llvm;
+
namespace {
class ARMAsmBackendELF : public ARMAsmBackend {
public:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 170f59a4c905..2a375be49a83 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -10,6 +10,7 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+#include "ARMAsmBackend.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 4289a73e9d6b..088b4205ed62 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -289,18 +289,20 @@ namespace ARMII {
/// higher 16 bit of the address. Used only via movt instruction.
MO_HI16 = 0x2,
- /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
- /// call operand.
- MO_PLT = 0x3,
-
/// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
/// just that part of the flag set.
- MO_OPTION_MASK = 0x3f,
+ MO_OPTION_MASK = 0x1f,
/// MO_DLLIMPORT - On a symbol operand, this represents that the reference
/// to the symbol is for an import stub. This is used for DLL import
/// storage class indication on Windows.
- MO_DLLIMPORT = 0x40,
+ MO_DLLIMPORT = 0x20,
+
+ /// MO_SECREL - On a symbol operand this indicates that the immediate is
+ /// the offset from beginning of section.
+ ///
+ /// This is the TLS offset for the COFF/Windows TLS mechanism.
+ MO_SECREL = 0x40,
/// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it
/// represents a symbol which, if indirect, will get special Darwin mangling
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 52eba8be288f..4118fe8e8cdb 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -34,8 +34,8 @@ namespace {
~ARMELFObjectWriter() override;
- unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel) const override;
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
@@ -67,7 +67,7 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
// Need to examine the Fixup when determining whether to
// emit the relocation as an explicit symbol or as a section relative
// offset
-unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
return GetRelocTypeInner(Target, Fixup, IsPCRel);
@@ -98,6 +98,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_GOT_PREL:
Type = ELF::R_ARM_GOT_PREL;
break;
+ case MCSymbolRefExpr::VK_ARM_PREL31:
+ Type = ELF::R_ARM_PREL31;
+ break;
}
break;
case ARM::fixup_arm_blx:
@@ -106,7 +109,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PLT:
Type = ELF::R_ARM_CALL;
break;
- case MCSymbolRefExpr::VK_ARM_TLSCALL:
+ case MCSymbolRefExpr::VK_TLSCALL:
Type = ELF::R_ARM_TLS_CALL;
break;
default:
@@ -120,6 +123,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
Type = ELF::R_ARM_JUMP24;
break;
case ARM::fixup_t2_condbranch:
+ Type = ELF::R_ARM_THM_JUMP19;
+ break;
case ARM::fixup_t2_uncondbranch:
Type = ELF::R_ARM_THM_JUMP24;
break;
@@ -138,7 +143,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
switch (Modifier) {
- case MCSymbolRefExpr::VK_ARM_TLSCALL:
+ case MCSymbolRefExpr::VK_TLSCALL:
Type = ELF::R_ARM_THM_TLS_CALL;
break;
default:
@@ -210,10 +215,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_TLSLDO:
Type = ELF::R_ARM_TLS_LDO32;
break;
- case MCSymbolRefExpr::VK_ARM_TLSCALL:
+ case MCSymbolRefExpr::VK_TLSCALL:
Type = ELF::R_ARM_TLS_CALL;
break;
- case MCSymbolRefExpr::VK_ARM_TLSDESC:
+ case MCSymbolRefExpr::VK_TLSDESC:
Type = ELF::R_ARM_TLS_GOTDESC;
break;
case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 57577dc834b7..36cb74765f3b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -763,6 +763,12 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
setAttributeItem(Virtualization_use, AllowTZVirtualization, false);
break;
+ case ARM::AK_ARMV8MBaseline:
+ case ARM::AK_ARMV8MMainline:
+ setAttributeItem(THUMB_ISA_use, AllowThumbDerived, false);
+ setAttributeItem(CPU_arch_profile, MicroControllerProfile, false);
+ break;
+
case ARM::AK_IWMMXT:
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, Allowed, false);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 46ba57170db5..3fe2302bdd37 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -33,6 +33,13 @@ enum Fixups {
// fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
// the short-swapped encoding of Thumb2 instructions.
fixup_t2_pcrel_10,
+ // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses
+ // used in VFP instructions where bit 0 not encoded (so it's encoded as an
+ // 8-bit immediate).
+ fixup_arm_pcrel_9,
+ // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for
+ // the short-swapped encoding of Thumb2 instructions.
+ fixup_t2_pcrel_9,
// fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
// addresses where the lower 2 bits are not encoded (so it's encoded as an
// 8-bit immediate).
@@ -100,6 +107,9 @@ enum Fixups {
fixup_t2_movt_hi16, // :upper16:
fixup_t2_movw_lo16, // :lower16:
+ // fixup_arm_mod_imm - Fixup for mod_imm
+ fixup_arm_mod_imm,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index bda37f6616a8..53cd29a6061e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -13,7 +13,6 @@
#include "ARMMCAsmInfo.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -33,7 +32,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS()
+ ExceptionsType = (TheTriple.isOSDarwin() && !TheTriple.isWatchABI())
? ExceptionHandling::SjLj
: ExceptionHandling::DwarfCFI;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index b88578309f08..9fca13eeea93 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -120,11 +120,11 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+ /// getThumbBranchTargetOpValue - Return encoding info for 24-bit
/// immediate Thumb2 direct branch target.
- uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ uint32_t getThumbBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
/// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
/// branch target.
@@ -214,11 +214,6 @@ public:
llvm_unreachable("Invalid ShiftOpc!");
}
- /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
- uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
/// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
@@ -255,11 +250,16 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+ /// getAddrMode5OpValue - Return encoding info for 'reg +/- (imm8 << 2)' operand.
uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ /// getAddrMode5FP16OpValue - Return encoding info for 'reg +/- (imm8 << 1)' operand.
+ uint32_t getAddrMode5FP16OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
/// getCCOutOpValue - Return encoding of the 's' bit.
unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
@@ -312,12 +312,8 @@ public:
// Support for fixups (MCFixup)
if (MO.isExpr()) {
const MCExpr *Expr = MO.getExpr();
- // In instruction code this value always encoded as lowest 12 bits,
- // so we don't have to perform any specific adjustments.
- // Due to requirements of relocatable records we have to use FK_Data_4.
- // See ARMELFObjectWriter::ExplicitRelSym and
- // ARMELFObjectWriter::GetRelocTypeInner for more details.
- MCFixupKind Kind = MCFixupKind(FK_Data_4);
+ // Fixups resolve to plain values that need to be encoded.
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_mod_imm);
Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
return 0;
}
@@ -345,9 +341,6 @@ public:
unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
/// getSORegOpValue - Return an encoded so_reg shifted register value.
unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op,
@@ -757,10 +750,9 @@ getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
/// immediate branch target.
-uint32_t ARMMCCodeEmitter::
-getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+uint32_t ARMMCCodeEmitter::getThumbBranchTargetOpValue(
+ const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
unsigned Val = 0;
const MCOperand MO = MI.getOperand(OpIdx);
@@ -1049,12 +1041,12 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
switch (ARM16Expr->getKind()) {
default: llvm_unreachable("Unsupported ARMFixup");
case ARMMCExpr::VK_ARM_HI16:
- Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movt_hi16
- : ARM::fixup_arm_movt_hi16);
+ Kind = MCFixupKind(isThumb(STI) ? ARM::fixup_t2_movt_hi16
+ : ARM::fixup_arm_movt_hi16);
break;
case ARMMCExpr::VK_ARM_LO16:
- Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movw_lo16
- : ARM::fixup_arm_movw_lo16);
+ Kind = MCFixupKind(isThumb(STI) ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
break;
}
@@ -1105,21 +1097,6 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
}
uint32_t ARMMCCodeEmitter::
-getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- // {17-14} Rn
- // {13} 1 == imm12, 0 == Rm
- // {12} isAdd
- // {11-0} imm12/Rm
- const MCOperand &MO = MI.getOperand(OpIdx);
- unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
- uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups, STI);
- Binary |= Rn << 14;
- return Binary;
-}
-
-uint32_t ARMMCCodeEmitter::
getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -1252,7 +1229,7 @@ getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
return (MO.getImm() >> 2);
}
-/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- (imm8 << 2)' operand.
uint32_t ARMMCCodeEmitter::
getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
@@ -1292,6 +1269,46 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
return Binary;
}
+/// getAddrMode5FP16OpValue - Return encoding info for 'reg +/- (imm8 << 1)' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5FP16OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false; // 'U' bit is handled as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind;
+ if (isThumb2(STI))
+ Kind = MCFixupKind(ARM::fixup_t2_pcrel_9);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_pcrel_9);
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+
+ ++MCNumCPRelocations;
+ } else {
+ EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups, STI);
+ isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+ }
+
+ uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
unsigned ARMMCCodeEmitter::
getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
@@ -1446,23 +1463,6 @@ getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
}
unsigned ARMMCCodeEmitter::
-getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO1 = MI.getOperand(OpNum);
-
- // FIXME: Needs fixup support.
- unsigned Value = 0;
- int32_t tmp = (int32_t)MO1.getImm();
- if (tmp < 0)
- tmp = abs(tmp);
- else
- Value |= 4096; // Set the ADD bit
- Value |= tmp & 4095;
- return Value;
-}
-
-unsigned ARMMCCodeEmitter::
getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 8c8c249addb5..afb089ab0286 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -16,7 +16,6 @@
#include "ARMMCTargetDesc.h"
#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -201,18 +200,6 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- if (RM == Reloc::Default) {
- // Default relocation model on Darwin is PIC, not DynamicNoPIC.
- RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
- }
- X->initMCCodeGenInfo(RM, CM, OL);
- return X;
-}
-
static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
MCAsmBackend &MAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll) {
@@ -291,9 +278,6 @@ extern "C" void LLVMInitializeARMTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo);
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo);
-
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index 4468132588cf..482bcf902518 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -12,7 +12,7 @@
#include "llvm-c/Disassembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCRelocationInfo.h"
+#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
using namespace llvm;
using namespace object;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index cfd504e533af..cfa6ce7da65e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -389,7 +389,8 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
uint32_t Offset = Target.getConstant();
if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA)
Offset += 1 << Log2Size;
- if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A))
+ if (Offset && A && !Writer->doesSymbolRequireExternRelocation(*A) &&
+ RelocType != MachO::ARM_RELOC_HALF)
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
Target, RelocType, Log2Size,
FixedValue);
@@ -447,8 +448,10 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
if (Type == MachO::ARM_RELOC_HALF) {
- // The other-half value only gets populated for the movt and movw
- // relocation entries.
+ // The entire addend is needed to correctly apply a relocation. One half is
+ // extracted from the instruction itself, the other comes from this
+ // PAIR. I.e. it's correct that we insert the high bits of the addend in the
+ // MOVW case here. relocation entries.
uint32_t Value = 0;
switch ((unsigned)Fixup.getKind()) {
default: break;
diff --git a/lib/Target/ARM/MCTargetDesc/Makefile b/lib/Target/ARM/MCTargetDesc/Makefile
deleted file mode 100644
index 448ed9df2bff..000000000000
--- a/lib/Target/ARM/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index ed2deeaa24c0..7f2124033982 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -378,12 +378,14 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
}
bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(*Fn.getFunction()))
+ return false;
+
TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
- // Only run this for CortexA9.
- if (!STI->isCortexA9())
+ if (!STI->expandMLx())
return false;
isLikeA9 = STI->isLikeA9() || STI->isSwift();
isSwift = STI->isSwift();
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
deleted file mode 100644
index c1601a3f29dd..000000000000
--- a/lib/Target/ARM/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMARMCodeGen
-TARGET = ARM
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
- ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
- ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
- ARMGenCallingConv.inc \
- ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
- ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
-
-DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 5acb2d46f3e7..549af00fcc99 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -115,7 +115,7 @@ L6:
.long -858993459
.long 1074318540
-3) struct copies appear to be done field by field
+3) struct copies appear to be done field by field
instead of by words, at least sometimes:
struct foo { int x; short s; char c1; char c2; };
@@ -142,7 +142,7 @@ a good way to measure on ARM).
* Consider this silly example:
-double bar(double x) {
+double bar(double x) {
double r = foo(3.1);
return x+r;
}
@@ -162,7 +162,7 @@ _bar:
fmrrd r0, r1, d0
ldmfd sp!, {r4, r5, r7, pc}
-Ignore the prologue and epilogue stuff for a second. Note
+Ignore the prologue and epilogue stuff for a second. Note
mov r4, r0
mov r5, r1
the copys to callee-save registers and the fact they are only being used by the
@@ -269,7 +269,7 @@ LBB4:
b LBB2
If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
-then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
+then eliminate beq and turn the unconditional branch to LBB2 to a bne.
See McCat/18-imp/ComputeBoundingBoxes for an example.
@@ -391,10 +391,10 @@ void foo(signed char* p) {
}
llvm decides it's a good idea to turn the repeated if...else into a
-binary tree, as if it were a switch; the resulting code requires -1
+binary tree, as if it were a switch; the resulting code requires -1
compare-and-branches when *p<=2 or *p==5, the same number if *p==4
or *p>6, and +1 if *p==3. So it should be a speed win
-(on balance). However, the revised code is larger, with 4 conditional
+(on balance). However, the revised code is larger, with 4 conditional
branches instead of 3.
More seriously, there is a byte->word extend before
@@ -421,8 +421,8 @@ int foo(int a, int b, int c, int d) {
return (int)(acc >> 32);
}
-Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies
-two signed 32-bit values to produce a 64-bit value, and accumulates this with
+Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies
+two signed 32-bit values to produce a 64-bit value, and accumulates this with
a 64-bit value.
We currently get this with both v4 and v6:
@@ -513,7 +513,7 @@ Be careful though as the last attempt caused infinite looping on lencod.
//===---------------------------------------------------------------------===//
-Predication issue. This function:
+Predication issue. This function:
extern unsigned array[ 128 ];
int foo( int x ) {
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index df73554372d8..3f88eb818062 100644
--- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -8,7 +8,6 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/ARM/TargetInfo/Makefile b/lib/Target/ARM/TargetInfo/Makefile
deleted file mode 100644
index 6292ab14b346..000000000000
--- a/lib/Target/ARM/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/TargetInfo/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARMInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 93e0ac4aa320..c0732e4b750a 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -38,18 +38,17 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
return !MF.getFrameInfo()->hasVarSizedObjects();
}
-static void
-emitSPUpdate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const TargetInstrInfo &TII, DebugLoc dl,
- const ThumbRegisterInfo &MRI,
- int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+static void emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, const DebugLoc &dl,
+ const ThumbRegisterInfo &MRI, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags) {
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
MRI, MIFlags);
}
-void Thumb1FrameLowering::
+MachineBasicBlock::iterator Thumb1FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const Thumb1InstrInfo &TII =
@@ -60,9 +59,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
// ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc dl = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
+ MachineInstr &Old = *I;
+ DebugLoc dl = Old.getDebugLoc();
+ unsigned Amount = Old.getOperand(0).getImm();
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
@@ -71,7 +70,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
Amount = (Amount+Align-1)/Align*Align;
// Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
+ unsigned Opc = Old.getOpcode();
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
} else {
@@ -80,7 +79,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
}
}
}
- MBB.erase(I);
+ return MBB.erase(I);
}
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
@@ -151,7 +150,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R9:
case ARM::R10:
case ARM::R11:
- if (STI.isTargetMachO()) {
+ if (STI.splitFramePushPop()) {
GPRCS2Size += 4;
break;
}
@@ -189,7 +188,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
int FramePtrOffsetInBlock = 0;
unsigned adjustedGPRCS1Size = GPRCS1Size;
- if (tryFoldSPUpdateIntoPushPop(STI, MF, std::prev(MBBI), NumBytes)) {
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
FramePtrOffsetInBlock = NumBytes;
adjustedGPRCS1Size += NumBytes;
NumBytes = 0;
@@ -213,7 +212,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetMachO())
+ if (STI.splitFramePushPop())
break;
// fallthough
case ARM::R0:
@@ -304,16 +303,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
AFI->setShouldRestoreSPFromFP(true);
}
-static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
- if (MI->getOpcode() == ARM::tLDRspi &&
- MI->getOperand(1).isFI() &&
- isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
+ if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs))
return true;
- else if (MI->getOpcode() == ARM::tPOP) {
+ else if (MI.getOpcode() == ARM::tPOP) {
// The first two operands are predicates. The last two are
// imp-def and imp-use of SP. Check everything in between.
- for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
- if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ for (int i = 2, e = MI.getNumOperands() - 2; i != e; ++i)
+ if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
return false;
return true;
}
@@ -346,8 +344,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.begin()) {
do
--MBBI;
- while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
- if (!isCSRestore(MBBI, CSRegs))
+ while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs));
+ if (!isCSRestore(*MBBI, CSRegs))
++MBBI;
}
@@ -376,11 +374,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(FramePtr));
} else {
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
- &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
+ &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
- if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
+ if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes))
emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
- } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes))
+ } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
}
}
@@ -467,7 +465,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// Look for a temporary register to use.
// First, compute the liveness information.
LivePhysRegs UsedRegs(STI.getRegisterInfo());
- UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
+ UsedRegs.addLiveOuts(MBB);
// The semantic of pristines changed recently and now,
// the callee-saved registers that are touched in the function
// are not part of the pristines set anymore.
@@ -637,7 +635,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
if (MI != MBB.end())
- MIB.copyImplicitOps(&*MI);
+ MIB.copyImplicitOps(*MI);
MI = MBB.erase(MI);
} else
// LR may only be popped into PC, as part of return sequence.
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index 27faac63683a..9de1ba1d7009 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -41,7 +41,7 @@ public:
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- void
+ MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 530e1d33839a..159731d8fc72 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -38,9 +38,9 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
}
void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
// Need to check the arch.
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>();
@@ -118,11 +118,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
}
-void
-Thumb1InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
- Reloc::Model RM) const {
- if (RM == Reloc::PIC_)
- expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi, RM);
+void Thumb1InstrInfo::expandLoadStackGuard(
+ MachineBasicBlock::iterator MI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ if (TM.isPositionIndependent())
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi);
else
- expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi, RM);
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi);
}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index f3f493d89237..931914ad2799 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -38,9 +38,8 @@ public:
///
const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
- void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -55,8 +54,7 @@ public:
const TargetRegisterInfo *TRI) const override;
private:
- void expandLoadStackGuard(MachineBasicBlock::iterator MI,
- Reloc::Model RM) const override;
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index bf0498dfda69..0c7055551632 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -36,6 +36,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return "Thumb IT blocks insertion pass";
}
@@ -165,7 +170,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
++I;
if (I != E) {
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg);
+ ARMCC::CondCodes NCC = getITInstrPredicate(*I, NPredReg);
if (NCC == CC || NCC == OCC)
return true;
}
@@ -182,7 +187,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
MachineInstr *MI = &*MBBI;
DebugLoc dl = MI->getDebugLoc();
unsigned PredReg = 0;
- ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
if (CC == ARMCC::AL) {
++MBBI;
continue;
@@ -222,7 +227,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
MI = NMI;
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
+ ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg);
if (NCC == CC || NCC == OCC) {
Mask |= (NCC & 1) << Pos;
// Add implicit use of ITSTATE.
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 4da769f23280..e2e6dafd218a 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -50,7 +50,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
MachineBasicBlock *NewDest) const {
MachineBasicBlock *MBB = Tail->getParent();
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
- if (!AFI->hasITBlocks()) {
+ if (!AFI->hasITBlocks() || Tail->isBranch()) {
TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
return;
}
@@ -58,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// If the first instruction of Tail is predicated, we may have to update
// the IT instruction.
unsigned PredReg = 0;
- ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg);
+ ARMCC::CondCodes CC = getInstrPredicate(*Tail, PredReg);
MachineBasicBlock::iterator MBBI = Tail;
if (CC != ARMCC::AL)
// Expecting at least the t2IT instruction before it.
@@ -106,13 +106,13 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
}
unsigned PredReg = 0;
- return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+ return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL;
}
void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
// Handle SPR, DPR, and QPR copies.
if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
@@ -148,8 +148,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
// otherwise).
- MachineRegisterInfo *MRI = &MF.getRegInfo();
- MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+ }
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
@@ -187,8 +189,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
// otherwise).
- MachineRegisterInfo *MRI = &MF.getRegInfo();
- MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(DestReg,
+ &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+ }
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
@@ -204,20 +209,22 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
-void
-Thumb2InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
- Reloc::Model RM) const {
- if (RM == Reloc::PIC_)
- expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12, RM);
+void Thumb2InstrInfo::expandLoadStackGuard(
+ MachineBasicBlock::iterator MI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ if (MF.getTarget().isPositionIndependent())
+ expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12);
else
- expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12, RM);
+ expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12);
}
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI, DebugLoc dl,
- unsigned DestReg, unsigned BaseReg, int NumBytes,
- ARMCC::CondCodes Pred, unsigned PredReg,
- const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg,
+ unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const ARMBaseInstrInfo &TII,
+ unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
.addReg(BaseReg, RegState::Kill)
@@ -459,7 +466,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset += MI.getOperand(FrameRegIdx+1).getImm();
unsigned PredReg;
- if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+ if (Offset == 0 && getInstrPredicate(MI, PredReg) == ARMCC::AL) {
// Turn it into a move.
MI.setDesc(TII.get(ARM::tMOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
@@ -627,9 +634,9 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
-ARMCC::CondCodes
-llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
- unsigned Opc = MI->getOpcode();
+ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI,
+ unsigned &PredReg) {
+ unsigned Opc = MI.getOpcode();
if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
return ARMCC::AL;
return getInstrPredicate(MI, PredReg);
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 916ab06ec305..15d63300b6a2 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -39,9 +39,8 @@ public:
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const override;
- void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -63,16 +62,13 @@ public:
const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
private:
- void expandLoadStackGuard(MachineBasicBlock::iterator MI,
- Reloc::Model RM) const override;
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
};
/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
/// to llvm::getInstrPredicate except it returns AL for conditional branch
/// instructions which are "predicated", but are not in IT blocks.
-ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
-
-
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
}
#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index bcd0e5751258..c4fdb9b3147d 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -18,11 +18,12 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h" // To access Function attributes
+#include "llvm/IR/Function.h" // To access Function attributes
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "t2-reduce-size"
@@ -115,12 +116,14 @@ namespace {
{ ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
{ ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
{ ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
{ ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
{ ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
{ ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
{ ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
{ ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
{ ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
@@ -143,6 +146,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return "Thumb2 instruction size reduction pass";
}
@@ -208,7 +216,7 @@ namespace {
}
Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
- : MachineFunctionPass(ID), PredicateFtor(Ftor) {
+ : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
OptimizeSize = MinimizeSize = false;
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
@@ -417,6 +425,46 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
HasShift = true;
OpNum = 4;
break;
+ case ARM::t2LDR_POST:
+ case ARM::t2STR_POST: {
+ if (!MBB.getParent()->getFunction()->optForMinSize())
+ return false;
+
+ // We're creating a completely different type of load/store - LDM from LDR.
+ // For this reason we can't reuse the logic at the end of this function; we
+ // have to implement the MI building here.
+ bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
+ unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
+ unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
+ unsigned Offset = MI->getOperand(3).getImm();
+ unsigned PredImm = MI->getOperand(4).getImm();
+ unsigned PredReg = MI->getOperand(5).getReg();
+ assert(isARMLowRegister(Rt));
+ assert(isARMLowRegister(Rn));
+
+ if (Offset != 4)
+ return false;
+
+ // Add the 16-bit load / store instruction.
+ DebugLoc dl = MI->getDebugLoc();
+ auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
+ .addReg(Rn, RegState::Define)
+ .addReg(Rn)
+ .addImm(PredImm)
+ .addReg(PredReg)
+ .addReg(Rt, IsStore ? 0 : RegState::Define);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
+ // Kill the old instruction.
+ MI->eraseFromBundle();
+ ++NumLdSts;
+ return true;
+ }
case ARM::t2LDMIA: {
unsigned BaseReg = MI->getOperand(0).getReg();
assert(isARMLowRegister(BaseReg));
@@ -597,7 +645,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2ADDSri:
case ARM::t2ADDSrr: {
unsigned PredReg = 0;
- if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+ if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
switch (Opc) {
default: break;
case ARM::t2ADDSri: {
@@ -670,7 +718,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (Reg1 != Reg0)
return false;
// Try to commute the operands to make it a 2-address instruction.
- MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
if (!CommutedMI)
return false;
}
@@ -678,11 +726,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Try to commute the operands to make it a 2-address instruction.
unsigned CommOpIdx1 = 1;
unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
- if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
MI->getOperand(CommOpIdx2).getReg() != Reg0)
return false;
MachineInstr *CommutedMI =
- TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2);
+ TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
if (!CommutedMI)
return false;
}
@@ -702,7 +750,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Check if it's possible / necessary to transfer the predicate.
const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
bool SkipPred = false;
if (Pred != ARMCC::AL) {
if (!NewMCID.isPredicable())
@@ -798,7 +846,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Check if it's possible / necessary to transfer the predicate.
const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
bool SkipPred = false;
if (Pred != ARMCC::AL) {
if (!NewMCID.isPredicable())
@@ -983,7 +1031,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
NextMII->bundleWithPred();
}
- if (!NextInSameBundle && MI->isInsideBundle()) {
+ if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
// FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
// marker is only on the BUNDLE instruction. Process the BUNDLE
// instruction as we finish with the bundled instruction to work around
@@ -1050,5 +1098,5 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
/// reduction pass.
FunctionPass *llvm::createThumb2SizeReductionPass(
std::function<bool(const Function &)> Ftor) {
- return new Thumb2SizeReduce(Ftor);
+ return new Thumb2SizeReduce(std::move(Ftor));
}
diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index b5f9d7e38f27..6c26c8843865 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -61,7 +61,7 @@ ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF,
static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, unsigned DestReg,
+ const DebugLoc &dl, unsigned DestReg,
unsigned SubIdx, int Val,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) {
@@ -81,7 +81,7 @@ static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, unsigned DestReg,
+ const DebugLoc &dl, unsigned DestReg,
unsigned SubIdx, int Val,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) {
@@ -101,9 +101,9 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
void ThumbRegisterInfo::emitLoadConstPool(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl,
- unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred,
- unsigned PredReg, unsigned MIFlags) const {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (STI.isThumb1Only()) {
@@ -120,57 +120,55 @@ void ThumbRegisterInfo::emitLoadConstPool(
/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
/// in a register using mov / mvn sequences or load the immediate from a
/// constpool entry.
-static
-void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned BaseReg,
- int NumBytes, bool CanChangeCC,
- const TargetInstrInfo &TII,
- const ARMBaseRegisterInfo& MRI,
- unsigned MIFlags = MachineInstr::NoFlags) {
- MachineFunction &MF = *MBB.getParent();
- bool isHigh = !isARMLowRegister(DestReg) ||
- (BaseReg != 0 && !isARMLowRegister(BaseReg));
- bool isSub = false;
- // Subtract doesn't have high register version. Load the negative value
- // if either base or dest register is a high register. Also, if do not
- // issue sub as part of the sequence if condition register is to be
- // preserved.
- if (NumBytes < 0 && !isHigh && CanChangeCC) {
- isSub = true;
- NumBytes = -NumBytes;
- }
- unsigned LdReg = DestReg;
- if (DestReg == ARM::SP)
- assert(BaseReg == ARM::SP && "Unexpected!");
- if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
- LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
-
- if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
- .addImm(NumBytes).setMIFlags(MIFlags);
- } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
- .addImm(NumBytes).setMIFlags(MIFlags);
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
- .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
- } else
- MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
- ARMCC::AL, 0, MIFlags);
-
- // Emit add / sub.
- int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr
- : ARM::tADDrr);
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
- if (Opc != ARM::tADDhirr)
- MIB = AddDefaultT1CC(MIB);
- if (DestReg == ARM::SP || isSub)
- MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
- else
- MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
- AddDefaultPred(MIB);
+static void emitThumbRegPlusImmInReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes,
+ bool CanChangeCC, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ bool isHigh = !isARMLowRegister(DestReg) ||
+ (BaseReg != 0 && !isARMLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP)
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
+ LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+
+ if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes)
+ .setMIFlags(MIFlags);
+ } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ .addImm(NumBytes)
+ .setMIFlags(MIFlags);
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ .addReg(LdReg, RegState::Kill)
+ .setMIFlags(MIFlags);
+ } else
+ MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0,
+ MIFlags);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr
+ : ((isHigh || !CanChangeCC) ? ARM::tADDhirr : ARM::tADDrr);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+ if (Opc != ARM::tADDhirr)
+ MIB = AddDefaultT1CC(MIB);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+ AddDefaultPred(MIB);
}
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
@@ -179,10 +177,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
/// be too long. This is allowed to modify the condition flags.
void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned BaseReg,
- int NumBytes, const TargetInstrInfo &TII,
- const ARMBaseRegisterInfo& MRI,
+ const DebugLoc &dl, unsigned DestReg,
+ unsigned BaseReg, int NumBytes,
+ const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo &MRI,
unsigned MIFlags) {
bool isSub = NumBytes < 0;
unsigned Bytes = (unsigned)NumBytes;
@@ -281,7 +279,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
unsigned RequiredExtraInstrs;
if (ExtraRange)
- RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange;
+ RequiredExtraInstrs = alignTo(RangeAfterCopy, ExtraRange) / ExtraRange;
else if (RangeAfterCopy > 0)
// We need an extra instruction but none is available
RequiredExtraInstrs = 1000000;
diff --git a/lib/Target/ARM/ThumbRegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h
index 23aaff37f409..e6b06959e428 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/lib/Target/ARM/ThumbRegisterInfo.h
@@ -39,8 +39,9 @@ public:
/// specified immediate.
void
emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0,
+ const DebugLoc &dl, unsigned DestReg, unsigned SubIdx,
+ int Val, ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0,
unsigned MIFlags = MachineInstr::NoFlags) const override;
// rewrite MI to access 'Offset' bytes from the FP. Update Offset to be