aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt3
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp27
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h2
-rw-r--r--lib/Target/PowerPC/PPC.h19
-rw-r--r--lib/Target/PowerPC/PPC.td43
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp10
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp36
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp724
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp40
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp61
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp228
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h14
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td190
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td10
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp137
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h3
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td222
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp20
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp35
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h7
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td14
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td39
-rw-r--r--lib/Target/PowerPC/PPCSchedule440.td29
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td66
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td1
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td1
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td1
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp62
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h8
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp27
-rw-r--r--lib/Target/PowerPC/README.txt1
-rw-r--r--lib/Target/PowerPC/TargetInfo/Makefile2
37 files changed, 1669 insertions, 425 deletions
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index bcd8bd291623..192d18d66440 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_target(PowerPCCodeGen
PPCAsmPrinter.cpp
PPCBranchSelector.cpp
PPCCodeEmitter.cpp
+ PPCCTRLoops.cpp
PPCHazardRecognizers.cpp
PPCInstrInfo.cpp
PPCISelDAGToDAG.cpp
@@ -28,6 +29,8 @@ add_llvm_target(PowerPCCodeGen
PPCSelectionDAGInfo.cpp
)
+add_dependencies(LLVMPowerPCCodeGen intrinsics_gen)
+
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 61d23ce06aa1..d175e3e79eb6 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -86,8 +86,33 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O,
const char *Modifier) {
- assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
unsigned Code = MI->getOperand(OpNo).getImm();
+ if (!Modifier) {
+ unsigned CCReg = MI->getOperand(OpNo+1).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: llvm_unreachable("Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+
+ // Print the CR bit number. The Code is ((BI << 5) | BO) for a
+ // BCC, but we must have the positive form here (BO == 12)
+ unsigned BI = Code >> 5;
+ assert((Code & 0xF) == 12 &&
+ "BO in predicate bit must have the positive form");
+
+ unsigned Value = 4*RegNo + BI;
+ O << Value;
+ return;
+ }
+
if (StringRef(Modifier) == "cc") {
switch ((PPC::Predicate)Code) {
case PPC::PRED_ALWAYS: return; // Don't print anything for always.
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 73fd5342a165..8f1e211c3e96 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -42,7 +42,7 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier);
+ raw_ostream &O, const char *Modifier = 0);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 5a6827ffd8d3..f6524222fd79 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -77,6 +77,7 @@ public:
} // end anonymous namespace
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new PPCMCCodeEmitter(MCII, STI, Ctx);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index b7fa0646288d..7162e158f033 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -22,6 +22,7 @@ class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
@@ -31,6 +32,7 @@ extern Target ThePPC32Target;
extern Target ThePPC64Target;
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 24a7178d1ff9..9103e1232505 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -30,6 +30,7 @@ namespace llvm {
class AsmPrinter;
class MCInst;
+ FunctionPass *createPPCCTRLoops();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@@ -50,21 +51,27 @@ namespace llvm {
/// and jumps to external functions on Tiger and earlier.
MO_DARWIN_STUB = 1,
- /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
- MO_LO16 = 4, MO_HA16 = 8,
-
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
- MO_PIC_FLAG = 16,
+ MO_PIC_FLAG = 4,
/// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
/// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
- MO_NLP_FLAG = 32,
+ MO_NLP_FLAG = 8,
/// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
/// symbol with hidden visibility. This causes a different kind of
/// non-lazy-pointer to be generated.
- MO_NLP_HIDDEN_FLAG = 64
+ MO_NLP_HIDDEN_FLAG = 16,
+
+ /// The next are not flags but distinct values.
+ MO_ACCESS_MASK = 224,
+
+ /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+ MO_LO16 = 32, MO_HA16 = 64,
+
+ MO_TPREL16_HA = 96,
+ MO_TPREL16_LO = 128
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index c554d39434c8..b7f16884363a 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -35,6 +35,8 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
+def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
@@ -42,12 +44,14 @@ def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
"Enable 64-bit registers usage for ppc32 [beta]">;
def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
"Enable Altivec instructions">;
-def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
- "Enable GPUL instructions">;
+def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
+ "Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
+def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
+ "Enable the isel instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
@@ -64,8 +68,10 @@ include "PPCInstrInfo.td"
//
def : Processor<"generic", G3Itineraries, [Directive32]>;
-def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>;
-def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>;
+def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureBookE]>;
+def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureBookE]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603]>;
@@ -74,28 +80,37 @@ def : Processor<"603ev", G3Itineraries, [Directive603]>;
def : Processor<"604", G3Itineraries, [Directive604]>;
def : Processor<"604e", G3Itineraries, [Directive604]>;
def : Processor<"620", G3Itineraries, [Directive620]>;
-def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"750", G4Itineraries, [Directive750]>;
+def : Processor<"g3", G3Itineraries, [Directive750]>;
def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
-def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
def : Processor<"970", G5Itineraries,
[Directive970, FeatureAltivec,
- FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"g5", G5Itineraries,
[Directive970, FeatureAltivec,
- FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
- FeatureFSqrt, FeatureSTFIWX,
- Feature64Bit
- /*, Feature64BitRegs */]>;
+def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureSTFIWX, FeatureISEL,
+ Feature64Bit
+ /*, Feature64BitRegs */]>;
+def : Processor<"pwr6", G5Itineraries,
+ [DirectivePwr6, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"pwr7", G5Itineraries,
+ [DirectivePwr7, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : Processor<"ppc64", G5Itineraries,
[Directive64, FeatureAltivec,
- FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index fb7aa71d98d3..f76b89c803ab 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -22,8 +22,8 @@
#include "PPCSubtarget.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Assembly/Writer.h"
@@ -248,7 +248,9 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'c': // Don't print "$" before a global var name or constant.
break; // PPC never has a prefix.
case 'L': // Write second word of DImode reference.
@@ -451,11 +453,13 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppc750",
"ppc970",
"ppcA2",
+ "power6",
+ "power7",
"ppc64"
};
unsigned Directive = Subtarget.getDarwinDirective();
- if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+ if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970)
Directive = PPC::DIR_970;
if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
Directive = PPC::DIR_7400;
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 5f775e16f1ca..21a0fb200f20 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -135,21 +135,33 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
MBBStartOffset += 4;
continue;
}
-
+
// Otherwise, we have to expand it to a long branch.
- // The BCC operands are:
- // 0. PPC branch predicate
- // 1. CR register
- // 2. Target MBB
- PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
- unsigned CRReg = I->getOperand(1).getReg();
-
MachineInstr *OldBranch = I;
DebugLoc dl = OldBranch->getDebugLoc();
-
- // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
- BuildMI(MBB, I, dl, TII->get(PPC::BCC))
- .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+
+ if (I->getOpcode() == PPC::BCC) {
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
+ } else {
+ llvm_unreachable("Unhandled branch type!");
+ }
// Uncond branch to the real destination.
I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
new file mode 100644
index 000000000000..2a2abb171fb1
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -0,0 +1,724 @@
+//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the PPC branch instructions
+// that decrement and test the count register (CTR) (bdnz and friends).
+// This pass is based on the HexagonHardwareLoops pass.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for CTR loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Assumes loops are normalized by IndVarSimplify
+// - Try inner-most loops first
+// - No nested CTR loops.
+// - No function calls in loops.
+//
+// Note: As with unconverted loops, PPCBranchSelector must be run after this
+// pass in order to convert long-displacement jumps into jump pairs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ctrloops"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+
+namespace {
+ class CountValue;
+ struct PPCCTRLoops : public MachineFunctionPass {
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ PPCCTRLoops() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "PPC CTR Loops"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+ /// induction variable.
+ /// Should be defined in MachineLoop. Based upon version in class Loop.
+ void getCanonicalInductionVariable(MachineLoop *L,
+ SmallVector<MachineInstr *, 4> &IVars,
+ SmallVector<MachineInstr *, 4> &IOps) const;
+
+ /// getTripCount - Return a loop-invariant LLVM register indicating the
+ /// number of times the loop will be executed. If the trip-count cannot
+ /// be determined, this return null.
+ CountValue *getTripCount(MachineLoop *L,
+ SmallVector<MachineInstr *, 2> &OldInsts) const;
+
+ /// isInductionOperation - Return true if the instruction matches the
+ /// pattern for an opertion that defines an induction variable.
+ bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+
+ /// isInvalidOperation - Return true if the instruction is not valid within
+ /// a CTR loop.
+ bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+ /// containsInavlidInstruction - Return true if the loop contains an
+ /// instruction that inhibits using the CTR loop.
+ bool containsInvalidInstruction(MachineLoop *L) const;
+
+ /// converToCTRLoop - Given a loop, check if we can convert it to a
+ /// CTR loop. If so, then perform the conversion and return true.
+ bool convertToCTRLoop(MachineLoop *L);
+
+ /// isDead - Return true if the instruction is now dead.
+ bool isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr *, 1> &DeadPhis) const;
+
+ /// removeIfDead - Remove the instruction if it is now dead.
+ void removeIfDead(MachineInstr *MI);
+ };
+
+ char PPCCTRLoops::ID = 0;
+
+
+ // CountValue class - Abstraction for a trip count of a loop. A
+ // smaller vesrsion of the MachineOperand class without the concerns
+ // of changing the operand representation.
+ class CountValue {
+ public:
+ enum CountValueType {
+ CV_Register,
+ CV_Immediate
+ };
+ private:
+ CountValueType Kind;
+ union Values {
+ unsigned RegNum;
+ int64_t ImmVal;
+ Values(unsigned r) : RegNum(r) {}
+ Values(int64_t i) : ImmVal(i) {}
+ } Contents;
+ bool isNegative;
+
+ public:
+ CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
+ isNegative(neg) {}
+ explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
+ isNegative(i < 0) {}
+ CountValueType getType() const { return Kind; }
+ bool isReg() const { return Kind == CV_Register; }
+ bool isImm() const { return Kind == CV_Immediate; }
+ bool isNeg() const { return isNegative; }
+
+ unsigned getReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.RegNum;
+ }
+ void setReg(unsigned Val) {
+ Contents.RegNum = Val;
+ }
+ int64_t getImm() const {
+ assert(isImm() && "Wrong CountValue accessor");
+ if (isNegative) {
+ return -Contents.ImmVal;
+ }
+ return Contents.ImmVal;
+ }
+ void setImm(int64_t Val) {
+ Contents.ImmVal = Val;
+ }
+
+ void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+ if (isReg()) { OS << PrintReg(getReg()); }
+ if (isImm()) { OS << getImm(); }
+ }
+ };
+} // end anonymous namespace
+
+
+/// isCompareEquals - Returns true if the instruction is a compare equals
+/// instruction with an immediate operand.
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
+ if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ SignedCmp = false;
+ return true;
+ }
+
+ return false;
+}
+
+
+/// createPPCCTRLoops - Factory for creating
+/// the CTR loop phase.
+FunctionPass *llvm::createPPCCTRLoops() {
+ return new PPCCTRLoops();
+}
+
+
+bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********* PPC CTR Loops *********\n");
+
+ bool Changed = false;
+
+ // get the loop information
+ MLI = &getAnalysis<MachineLoopInfo>();
+ // get the register information
+ MRI = &MF.getRegInfo();
+ // the target specific instructio info.
+ TII = MF.getTarget().getInstrInfo();
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I) {
+ MachineLoop *L = *I;
+ if (!L->getParentLoop()) {
+ Changed |= convertToCTRLoop(L);
+ }
+ }
+
+ return Changed;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable. We check for a simple recurrence pattern - an
+/// integer recurrence that decrements by one each time through the loop and
+/// ends at zero. If so, return the phi node that corresponds to it.
+///
+/// Based upon the similar code in LoopInfo except this code is specific to
+/// the machine.
+/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+///
+void
+PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
+ SmallVector<MachineInstr *, 4> &IVars,
+ SmallVector<MachineInstr *, 4> &IOps) const {
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+ assert(PI != TopMBB->pred_end() &&
+ "Loop must have more than one incoming edge!");
+ MachineBasicBlock *Backedge = *PI++;
+ if (PI == TopMBB->pred_end()) return; // dead loop
+ MachineBasicBlock *Incoming = *PI++;
+ if (PI != TopMBB->pred_end()) return; // multiple backedges?
+
+ // make sure there is one incoming and one backedge and determine which
+ // is which.
+ if (L->contains(Incoming)) {
+ if (L->contains(Backedge))
+ return;
+ std::swap(Incoming, Backedge);
+ } else if (!L->contains(Backedge))
+ return;
+
+ // Loop over all of the PHI nodes, looking for a canonical induction variable:
+ // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
+ // - The recurrence comes from the backedge.
+ // - the definition is an induction operatio.n
+ for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *MPhi = &*I;
+ unsigned DefReg = MPhi->getOperand(0).getReg();
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ // Check each operand for the value from the backedge.
+ MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
+ if (L->contains(MBB)) { // operands comes from the backedge
+ // Check if the definition is an induction operation.
+ MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
+ if (isInductionOperation(DI, DefReg)) {
+ IOps.push_back(DI);
+ IVars.push_back(MPhi);
+ }
+ }
+ }
+ }
+ return;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the
+/// number of times the loop will be executed. The trip count can
+/// be either a register or a constant value. If the trip-count
+/// cannot be determined, this returns null.
+///
+/// We find the trip count from the phi instruction that defines the
+/// induction variable. We follow the links to the CMP instruction
+/// to get the trip count.
+///
+/// Based upon getTripCount in LoopInfo.
+///
+CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
+ SmallVector<MachineInstr *, 2> &OldInsts) const {
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate a CTR loop if the loop has more than one exit.
+ if (LastMBB == 0)
+ return 0;
+
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+ if (LastI->getOpcode() != PPC::BCC)
+ return 0;
+
+ // We need to make sure that this compare is defining the condition
+ // register actually used by the terminating branch.
+
+ unsigned PredReg = LastI->getOperand(1).getReg();
+ DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
+
+ unsigned PredCond = LastI->getOperand(0).getImm();
+ if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
+ return 0;
+
+ // Check that the loop has a induction variable.
+ SmallVector<MachineInstr *, 4> IVars, IOps;
+ getCanonicalInductionVariable(L, IVars, IOps);
+ for (unsigned i = 0; i < IVars.size(); ++i) {
+ MachineInstr *IOp = IOps[i];
+ MachineInstr *IV_Inst = IVars[i];
+
+ // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
+ // if Imm is 0, get the count from the PHI opnd
+ // if Imm is -M, than M is the count
+ // Otherwise, Imm is the count
+ MachineOperand *IV_Opnd;
+ const MachineOperand *InitialValue;
+ if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+ InitialValue = &IV_Inst->getOperand(1);
+ IV_Opnd = &IV_Inst->getOperand(3);
+ } else {
+ InitialValue = &IV_Inst->getOperand(3);
+ IV_Opnd = &IV_Inst->getOperand(1);
+ }
+
+ DEBUG(dbgs() << "Considering:\n");
+ DEBUG(dbgs() << " induction operation: " << *IOp);
+ DEBUG(dbgs() << " induction variable: " << *IV_Inst);
+ DEBUG(dbgs() << " initial value: " << *InitialValue << "\n");
+
+ // Look for the cmp instruction to determine if we
+ // can get a useful trip count. The trip count can
+ // be either a register or an immediate. The location
+ // of the value depends upon the type (reg or imm).
+ for (MachineRegisterInfo::reg_iterator
+ RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+ RI != RE; ++RI) {
+ IV_Opnd = &RI.getOperand();
+ bool SignedCmp;
+ MachineInstr *MI = IV_Opnd->getParent();
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ MI->getOperand(0).getReg() == PredReg) {
+
+ OldInsts.push_back(MI);
+ OldInsts.push_back(IOp);
+
+ DEBUG(dbgs() << " compare: " << *MI);
+
+ const MachineOperand &MO = MI->getOperand(2);
+ assert(MO.isImm() && "IV Cmp Operand should be an immediate");
+
+ int64_t ImmVal;
+ if (SignedCmp)
+ ImmVal = (short) MO.getImm();
+ else
+ ImmVal = MO.getImm();
+
+ const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+ assert(L->contains(IV_DefInstr->getParent()) &&
+ "IV definition should occurs in loop");
+ int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
+
+ assert(InitialValue->isReg() && "Expecting register for init value");
+ unsigned InitialValueReg = InitialValue->getReg();
+
+ const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+
+ // Here we need to look for an immediate load (an li or lis/ori pair).
+ if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
+ DefInstr->getOpcode() == PPC::ORI)) {
+ int64_t start = (short) DefInstr->getOperand(2).getImm();
+ const MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+ if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
+ DefInstr2->getOpcode() == PPC::LIS)) {
+ DEBUG(dbgs() << " initial constant: " << *DefInstr);
+ DEBUG(dbgs() << " initial constant: " << *DefInstr2);
+
+ start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
+
+ int64_t count = ImmVal - start;
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+ return new CountValue(count/iv_value);
+ }
+ } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
+ DefInstr->getOpcode() == PPC::LI)) {
+ DEBUG(dbgs() << " initial constant: " << *DefInstr);
+
+ int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+ return new CountValue(count/iv_value);
+ } else if (iv_value == 1 || iv_value == -1) {
+ // We can't determine a constant starting value.
+ if (ImmVal == 0) {
+ return new CountValue(InitialValueReg, iv_value > 0);
+ }
+ // FIXME: handle non-zero end value.
+ }
+ // FIXME: handle non-unit increments (we might not want to introduce division
+ // but we can handle some 2^n cases with shifts).
+
+ }
+ }
+ }
+ return 0;
+}
+
+/// isInductionOperation - return true if the operation is matches the
+/// pattern that defines an induction variable:
+/// addi iv, c
+///
+bool
+PPCCTRLoops::isInductionOperation(const MachineInstr *MI,
+ unsigned IVReg) const {
+ return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) &&
+ MI->getOperand(1).isReg() && // could be a frame index instead
+ MI->getOperand(1).getReg() == IVReg);
+}
+
+/// isInvalidOperation - Return true if the operation is invalid within
+/// CTR loop.
+bool
+PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+ // call is not allowed because the callee may use a CTR loop
+ if (MI->getDesc().isCall()) {
+ return true;
+ }
+ // check if the instruction defines a CTR loop register
+ // (this will also catch nested CTR loops)
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// containsInvalidInstruction - Return true if the loop contains
+/// an instruction that inhibits the use of the CTR loop function.
+///
+bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const {
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+ const MachineInstr *MI = &*MII;
+ if (isInvalidLoopOperation(MI)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isDead returns true if the instruction is dead
+/// (this was essentially copied from DeadMachineInstructionElim::isDead, but
+/// with special cases for inline asm, physical registers and instructions with
+/// side effects removed)
+bool PPCCTRLoops::isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr *, 1> &DeadPhis) const {
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (!MRI->use_nodbg_empty(Reg)) {
+ // This instruction has users, but if the only user is the phi node for the
+ // parent block, and the only use of that phi node is this instruction, then
+ // this instruction is dead: both it (and the phi node) can be removed.
+ MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
+ if (llvm::next(I) == MRI->use_end() &&
+ I.getOperand().getParent()->isPHI()) {
+ MachineInstr *OnePhi = I.getOperand().getParent();
+
+ for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+ const MachineOperand &OPO = OnePhi->getOperand(j);
+ if (OPO.isReg() && OPO.isDef()) {
+ unsigned OPReg = OPO.getReg();
+
+ MachineRegisterInfo::use_iterator nextJ;
+ for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg),
+ E = MRI->use_end(); J!=E; J=nextJ) {
+ nextJ = llvm::next(J);
+ MachineOperand& Use = J.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+
+ if (MI != UseMI) {
+ // The phi node has a user that is not MI, bail...
+ return false;
+ }
+ }
+ }
+ }
+
+ DeadPhis.push_back(OnePhi);
+ } else {
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+void PPCCTRLoops::removeIfDead(MachineInstr *MI) {
+ // This procedure was essentially copied from DeadMachineInstructionElim
+
+ SmallVector<MachineInstr *, 1> DeadPhis;
+ if (isDead(MI, DeadPhis)) {
+ DEBUG(dbgs() << "CTR looping will remove: " << *MI);
+
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I!=E; I=nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand& Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI==MI)
+ continue;
+ if (Use.isDebug()) // this might also be a instr -> phi -> instr case
+ // which can also be removed.
+ UseMI->getOperand(0).setReg(0U);
+ }
+ }
+
+ MI->eraseFromParent();
+ for (unsigned i = 0; i < DeadPhis.size(); ++i) {
+ DeadPhis[i]->eraseFromParent();
+ }
+ }
+}
+
+/// converToCTRLoop - check if the loop is a candidate for
+/// converting to a CTR loop. If so, then perform the
+/// transformation.
+///
+/// This function works on innermost loops first. A loop can
+/// be converted if it is a counting loop; either a register
+/// value or an immediate.
+///
+/// The code makes several assumptions about the representation
+/// of the loop in llvm.
+bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
+ bool Changed = false;
+ // Process nested loops first.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToCTRLoop(*I);
+ }
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (Changed) {
+ return Changed;
+ }
+
+ SmallVector<MachineInstr *, 2> OldInsts;
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getTripCount(L, OldInsts);
+ if (TripCount == 0) {
+ DEBUG(dbgs() << "failed to get trip count!\n");
+ return false;
+ }
+ // Does the loop contain any invalid instructions?
+ if (containsInvalidInstruction(L)) {
+ return false;
+ }
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ // No preheader means there's not place for the loop instr.
+ if (Preheader == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ DebugLoc dl;
+ if (InsertPos != Preheader->end())
+ dl = InsertPos->getDebugLoc();
+
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate CTR loop if the loop has more than one exit.
+ if (LastMBB == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+
+ // Determine the loop start.
+ MachineBasicBlock *LoopStart = L->getTopBlock();
+ if (L->getLoopLatch() != LastMBB) {
+ // When the exit and latch are not the same, use the latch block as the
+ // start.
+ // The loop start address is used only after the 1st iteration, and the loop
+ // latch may contains instrs. that need to be executed after the 1st iter.
+ LoopStart = L->getLoopLatch();
+ // Make sure the latch is a successor of the exit, otherwise it won't work.
+ if (!LastMBB->isSuccessor(LoopStart)) {
+ return false;
+ }
+ }
+
+ // Convert the loop to a CTR loop
+ DEBUG(dbgs() << "Change to CTR loop at "; L->dump());
+
+ MachineFunction *MF = LastMBB->getParent();
+ const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>();
+ bool isPPC64 = Subtarget.isPPC64();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+
+ unsigned CountReg;
+ if (TripCount->isReg()) {
+ // Create a copy of the loop count register.
+ const TargetRegisterClass *SrcRC =
+ MF->getRegInfo().getRegClass(TripCount->getReg());
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+ (unsigned) PPC::EXTSW_32_64 :
+ (unsigned) TargetOpcode::COPY;
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
+ if (TripCount->isNeg()) {
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
+ CountReg).addReg(CountReg1);
+ }
+ } else {
+ assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
+ // Put the trip count in a register for transfer into the count register.
+
+ int64_t CountImm = TripCount->getImm();
+ assert(!TripCount->isNeg() && "Constant trip count must be positive");
+
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ if (CountImm > 0xFFFF) {
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
+ CountReg).addImm(CountImm >> 16);
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
+ CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF);
+ } else {
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::LI8 : PPC::LI),
+ CountReg).addImm(CountImm);
+ }
+ }
+
+ // Add the mtctr instruction to the beginning of the loop.
+ BuildMI(*Preheader, InsertPos, dl,
+ TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg,
+ TripCount->isImm() ? RegState::Kill : 0);
+
+ // Make sure the loop start always has a reference in the CFG. We need to
+ // create a BlockAddress operand to get this mechanism to work both the
+ // MachineBasicBlock and BasicBlock objects need the flag set.
+ LoopStart->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object
+ BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+ // Replace the loop branch with a bdnz instruction.
+ dl = LastI->getDebugLoc();
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ if (MBB != Preheader)
+ MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR);
+ }
+
+ // The loop ends with either:
+ // - a conditional branch followed by an unconditional branch, or
+ // - a conditional branch to the loop start.
+ assert(LastI->getOpcode() == PPC::BCC &&
+ "loop end must start with a BCC instruction");
+ // Either the BCC branches to the beginning of the loop, or it
+ // branches out of the loop and there is an unconditional branch
+ // to the start of the loop.
+ MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB();
+ BuildMI(*LastMBB, LastI, dl,
+ TII->get((BranchTarget == LoopStart) ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
+
+ // Conditional branch; just delete it.
+ DEBUG(dbgs() << "Removing old branch: " << *LastI);
+ LastMBB->erase(LastI);
+
+ delete TripCount;
+
+ // The induction operation (add) and the comparison (cmpwi) may now be
+ // unneeded. If these are unneeded, then remove them.
+ for (unsigned i = 0; i < OldInsts.size(); ++i)
+ removeIfDead(OldInsts[i]);
+
+ ++NumCTRLoops;
+ return true;
+}
+
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index b77a80bbf30d..c24afa908d69 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -330,6 +330,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
if (HasFP)
+ // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
+ // offsets of R1 is not allowed.
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
.addReg(PPC::R31)
.addImm(FPOffset)
@@ -366,9 +368,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
.addReg(PPC::R0, RegState::Kill)
.addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(PPC::R0);
} else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
@@ -381,9 +383,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
.addReg(PPC::R0, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(PPC::R0);
}
} else { // PPC64.
@@ -399,9 +401,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
.addReg(PPC::X0)
.addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(PPC::X0);
} else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
@@ -414,9 +416,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
.addReg(PPC::X0, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(PPC::X0);
}
}
@@ -492,7 +494,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
// subregisters of CR2. We just need to emit a move of CR2.
- if (PPC::CRBITRCRegisterClass->contains(Reg))
+ if (PPC::CRBITRCRegClass.contains(Reg))
continue;
MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
@@ -817,7 +819,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- if (PPC::GPRCRegisterClass->contains(Reg)) {
+ if (PPC::GPRCRegClass.contains(Reg)) {
HasGPSaveArea = true;
GPRegs.push_back(CSI[i]);
@@ -825,7 +827,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinGPR) {
MinGPR = Reg;
}
- } else if (PPC::G8RCRegisterClass->contains(Reg)) {
+ } else if (PPC::G8RCRegClass.contains(Reg)) {
HasG8SaveArea = true;
G8Regs.push_back(CSI[i]);
@@ -833,7 +835,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinG8R) {
MinG8R = Reg;
}
- } else if (PPC::F8RCRegisterClass->contains(Reg)) {
+ } else if (PPC::F8RCRegClass.contains(Reg)) {
HasFPSaveArea = true;
FPRegs.push_back(CSI[i]);
@@ -842,12 +844,12 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
MinFPR = Reg;
}
// FIXME SVR4: Disable CR save area for now.
- } else if (PPC::CRBITRCRegisterClass->contains(Reg)
- || PPC::CRRCRegisterClass->contains(Reg)) {
+ } else if (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)) {
// HasCRSaveArea = true;
- } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
HasVRSAVESaveArea = true;
- } else if (PPC::VRRCRegisterClass->contains(Reg)) {
+ } else if (PPC::VRRCRegClass.contains(Reg)) {
HasVRSaveArea = true;
VRegs.push_back(CSI[i]);
@@ -932,8 +934,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- if (PPC::CRBITRCRegisterClass->contains(Reg) ||
- PPC::CRRCRegisterClass->contains(Reg)) {
+ if (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -950,7 +952,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ if (PPC::VRSAVERCRegClass.contains(Reg)) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 5a04888dd45b..a00f686adce1 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -111,6 +111,23 @@ namespace {
/// immediate field. Because preinc imms have already been validated, just
/// accept it.
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+ if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ N.getOpcode() == ISD::TargetGlobalAddress) {
+ Out = N;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc
+ /// index field. Because preinc imms have already been validated, just
+ /// accept it.
+ bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const {
+ if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ N.getOpcode() == ISD::TargetGlobalAddress)
+ return false;
+
Out = N;
return true;
}
@@ -238,11 +255,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
DebugLoc dl;
if (PPCLowering.getPointerTy() == MVT::i32) {
- GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
} else {
- GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
}
@@ -697,7 +714,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
InFlag).getValue(1);
- if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
+ if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1)
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
CCReg), 0);
else
@@ -833,7 +850,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case PPCISD::MFCR: {
SDValue InFlag = N->getOperand(1);
// Use MFOCRF if supported.
- if (PPCSubTarget.isGigaProcessor())
+ if (PPCSubTarget.hasMFOCRF())
return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
N->getOperand(0), InFlag);
else
@@ -915,12 +932,44 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
- // FIXME: PPC64
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
MVT::Other, Ops, 3);
} else {
- llvm_unreachable("R+R preindex loads not supported yet!");
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDUX; break;
+ case MVT::f32: Opcode = PPC::LFSUX; break;
+ case MVT::i32: Opcode = PPC::LWZUX; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
+ "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDUX; break;
+ case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 3b24951d1dc9..aa819eeb30a2 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -51,9 +51,11 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
-cl::desc("enable preincrement load/store generation on PPC (experimental)"),
- cl::Hidden);
+static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
+cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
+cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
@@ -64,6 +66,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
setPow2DivIsCheap();
@@ -73,12 +76,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
- setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+ bool isPPC64 = Subtarget->isPPC64();
+ setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
- addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
- addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
- addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+ addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -130,17 +134,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
- setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
- setOperationAction(ISD::FMA , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ if (!Subtarget->hasFSQRT()) {
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
}
@@ -226,8 +230,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (Subtarget->isSVR4ABI()) {
+ if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
@@ -271,7 +275,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (Subtarget->has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
@@ -290,9 +294,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
- if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
- addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+ addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// 64-bit PowerPC wants to expand i128 shifts itself.
@@ -306,7 +310,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
- if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ if (Subtarget->hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -370,12 +374,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
- addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -389,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport())
+ if (Subtarget->has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+ }
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
@@ -398,7 +405,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
setExceptionPointerRegister(PPC::X3);
setExceptionSelectorRegister(PPC::X4);
@@ -415,7 +422,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::BSWAP);
// Darwin long double math library functions have $LDBL128 appended.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
@@ -432,6 +439,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (PPCSubTarget.isDarwin())
setPrefFunctionAlignment(4);
+ if (isPPC64 && Subtarget->isJITCodeModel())
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ setSupportJumpTables(false);
+
setInsertFencesForAtomic(true);
setSchedulingPreference(Sched::Hybrid);
@@ -902,10 +914,11 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
Disp.getOpcode() == ISD::TargetConstantPool ||
Disp.getOpcode() == ISD::TargetJumpTable);
Base = N.getOperand(0);
@@ -1006,7 +1019,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
if (N.getOpcode() == ISD::ADD) {
short imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
- Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
} else {
@@ -1015,7 +1028,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
@@ -1084,8 +1097,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
- // Disabled by default for now.
- if (!EnablePPCPreinc) return false;
+ if (DisablePPCPreinc) return false;
SDValue Ptr;
EVT VT;
@@ -1103,7 +1115,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
return false;
- // TODO: Check reg+reg first.
+ if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ AM = ISD::PRE_INC;
+ return true;
+ }
// LDU/STU use reg+imm*4, others use reg+imm.
if (VT != MVT::i64) {
@@ -1222,6 +1237,30 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+ bool is64bit = PPCSubTarget.isPPC64();
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_LO);
+
+ if (model != TLSModel::LocalExec)
+ llvm_unreachable("only local-exec TLS mode supported");
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+}
+
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
@@ -1440,13 +1479,16 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
Entry.Node = Nest; Args.push_back(Entry);
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C,
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C,
/*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ /*doesNotRet=*/false,
+ /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}
@@ -1702,7 +1744,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
@@ -1721,19 +1763,19 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
default:
llvm_unreachable("ValVT not supported by formal arguments Lowering");
case MVT::i32:
- RC = PPC::GPRCRegisterClass;
+ RC = &PPC::GPRCRegClass;
break;
case MVT::f32:
- RC = PPC::F4RCRegisterClass;
+ RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- RC = PPC::F8RCRegisterClass;
+ RC = &PPC::F8RCRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v4f32:
- RC = PPC::VRRCRegisterClass;
+ RC = &PPC::VRRCRegClass;
break;
}
@@ -1763,7 +1805,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// caller's stack frame, right above the parameter list area.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -2743,7 +2785,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
// Copy all of the result registers out of their specified physreg.
@@ -2800,7 +2842,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
for (unsigned i = 0; i != RVLocs.size(); ++i)
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
@@ -2864,14 +2906,19 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
}
SDValue
-PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
@@ -2921,7 +2968,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
@@ -2961,7 +3008,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -3485,7 +3532,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
// If this is the first return lowered for this function, add the regs to the
@@ -4559,7 +4606,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
@@ -4899,11 +4946,37 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
- if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8 ||
- MI->getOpcode() == PPC::SELECT_CC_F4 ||
- MI->getOpcode() == PPC::SELECT_CC_F8 ||
- MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+ if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)) {
+ unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
+ PPC::ISEL8 : PPC::ISEL;
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // The SelectPred is ((BI << 5) | BO) for a BCC
+ unsigned BO = SelectPred & 0xF;
+ assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
+
+ unsigned TrueOpNo, FalseOpNo;
+ if (BO == 12) {
+ TrueOpNo = 2;
+ FalseOpNo = 3;
+ } else {
+ TrueOpNo = 3;
+ FalseOpNo = 2;
+ SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
+ }
+
+ BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(TrueOpNo).getReg())
+ .addReg(MI->getOperand(FalseOpNo).getReg())
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+ } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
@@ -5612,18 +5685,18 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'b': // R1-R31
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
- return std::make_pair(0U, PPC::G8RCRegisterClass);
- return std::make_pair(0U, PPC::GPRCRegisterClass);
+ return std::make_pair(0U, &PPC::G8RCRegClass);
+ return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
if (VT == MVT::f32)
- return std::make_pair(0U, PPC::F4RCRegisterClass);
- else if (VT == MVT::f64)
- return std::make_pair(0U, PPC::F8RCRegisterClass);
+ return std::make_pair(0U, &PPC::F4RCRegClass);
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &PPC::F8RCRegClass);
break;
case 'v':
- return std::make_pair(0U, PPC::VRRCRegisterClass);
+ return std::make_pair(0U, &PPC::VRRCRegClass);
case 'y': // crrc
- return std::make_pair(0U, PPC::CRRCRegisterClass);
+ return std::make_pair(0U, &PPC::CRRCRegClass);
}
}
@@ -5839,11 +5912,30 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+/// is expanded to mul + add.
+bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f32:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- unsigned Directive = PPCSubTarget.getDarwinDirective();
- if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2)
- return Sched::ILP;
+ if (DisableILPPref)
+ return TargetLowering::getSchedulingPreference(N);
- return TargetLowering::getSchedulingPreference(N);
+ return Sched::ILP;
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 18eb07200307..b0a013b4b4cf 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -366,6 +366,12 @@ namespace llvm {
bool IsZeroVal, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -389,6 +395,7 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
@@ -439,12 +446,7 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
- bool isVarArg, bool doesNotRet, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
virtual bool
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 7f67a4159dfe..39778a5dc1e1 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -68,15 +68,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
def BLA8_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
}
let Uses = [CTR8, RM] in {
def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins variable_ops),
+ (outs), (ins),
"bctrl", BrB,
[(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
}
@@ -88,27 +88,27 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_ELF : IForm<18, 0, 1,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
let isCodeGenOnly = 1 in
def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func\n\tnop", BrB, []>;
def BLA8_ELF : IForm<18, 1, 1,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
let isCodeGenOnly = 1 in
def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func\n\tnop", BrB,
[(PPCcall_nop_SVR4 (i64 imm:$func))]>;
}
let Uses = [X11, CTR8, RM] in {
def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins variable_ops),
+ (outs), (ins),
"bctrl", BrB,
[(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
}
@@ -180,17 +180,17 @@ def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi8 :Pseudo< (outs),
- (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ (ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd8 $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset),
"#TC_RETURNa8 $func $offset",
[(PPCtc_return (i64 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
@@ -229,6 +229,15 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [CTR8], Uses = [CTR8] in {
+ def BDZ8 : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst", BrB, []>;
+ def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst", BrB, []>;
+ }
+}
+
// 64-but CR instructions
def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
@@ -256,6 +265,15 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+ "mfspr $rT, 268", SprMFTB>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+// Note that encoding mftb using mfspr is now the preferred form,
+// and has been since at least ISA v2.03. The mftb instruction has
+// now been phased out. Using mfspr, however, is known not to work on
+// the POWER3.
+
let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
[(set G8RC:$result,
@@ -278,45 +296,37 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
-// Copies, extends, truncates.
-def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "or $rA, $rS, $rB", IntGeneral,
- []>;
-def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "or $rA, $rS, $rB", IntGeneral,
- []>;
-
def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
- "li $rD, $imm", IntGeneral,
+ "li $rD, $imm", IntSimple,
[(set G8RC:$rD, immSExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
- "lis $rD, $imm", IntGeneral,
+ "lis $rD, $imm", IntSimple,
[(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
// Logical ops.
def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "nand $rA, $rS, $rB", IntGeneral,
+ "nand $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "and $rA, $rS, $rB", IntGeneral,
+ "and $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "andc $rA, $rS, $rB", IntGeneral,
+ "andc $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "or $rA, $rS, $rB", IntGeneral,
+ "or $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "nor $rA, $rS, $rB", IntGeneral,
+ "nor $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "orc $rA, $rS, $rB", IntGeneral,
+ "orc $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "eqv $rA, $rS, $rB", IntGeneral,
+ "eqv $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "xor $rA, $rS, $rB", IntGeneral,
+ "xor $rA, $rS, $rB", IntSimple,
[(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
// Logical ops with immediate.
@@ -329,20 +339,20 @@ def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
[(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IntGeneral,
+ "ori $dst, $src1, $src2", IntSimple,
[(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IntGeneral,
+ "oris $dst, $src1, $src2", IntSimple,
[(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IntGeneral,
+ "xori $dst, $src1, $src2", IntSimple,
[(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IntGeneral,
+ "xoris $dst, $src1, $src2", IntSimple,
[(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "add $rT, $rA, $rB", IntGeneral,
+ "add $rT, $rA, $rB", IntSimple,
[(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
let Defs = [CARRY] in {
@@ -355,10 +365,13 @@ def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
[(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
}
def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
- "addi $rD, $rA, $imm", IntGeneral,
+ "addi $rD, $rA, $imm", IntSimple,
+ [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+ "addi $rD, $rA, $imm", IntSimple,
[(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
- "addis $rD, $rA, $imm", IntGeneral,
+ "addis $rD, $rA, $imm", IntSimple,
[(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
@@ -374,7 +387,7 @@ def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subf $rT, $rA, $rB", IntGeneral,
[(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
- "neg $rT, $rA", IntGeneral,
+ "neg $rT, $rA", IntSimple,
[(set G8RC:$rT, (ineg G8RC:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
@@ -427,21 +440,21 @@ def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
}
def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsb $rA, $rS", IntGeneral,
+ "extsb $rA, $rS", IntSimple,
[(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsh $rA, $rS", IntGeneral,
+ "extsh $rA, $rS", IntSimple,
[(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsw $rA, $rS", IntGeneral,
+ "extsw $rA, $rS", IntSimple,
[(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsw $rA, $rS", IntGeneral,
+ "extsw $rA, $rS", IntSimple,
[(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
- "extsw $rA, $rS", IntGeneral,
+ "extsw $rA, $rS", IntSimple,
[(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
let Defs = [CARRY] in {
@@ -493,6 +506,10 @@ def RLWINM8 : MForm_2<21,
"rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
[]>;
+def ISEL8 : AForm_1<31, 15,
+ (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+ "isel $rT, $rA, $rB, $cond", IntGeneral,
+ []>;
} // End FXU Operations.
@@ -529,6 +546,16 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp
NoEncode<"$ea_result">;
// NO LWAU!
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
}
// Zero extending loads.
@@ -568,6 +595,22 @@ def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
+
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
}
}
@@ -603,6 +646,11 @@ def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "ldux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
}
def : Pat<(PPCload ixaddr:$src),
@@ -660,6 +708,14 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
s16immX4:$ptroff, ptr_rc:$ptrreg),
"stdu $rS, $ptroff($ptrreg)", LdStSTD,
@@ -668,10 +724,41 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
-let mayStore = 1 in
-def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
- "stdux $rS, $dst", LdStSTD,
- []>, isPPC64;
+
+def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti32 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stdux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked, isPPC64;
// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
@@ -706,11 +793,12 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
// Extensions and truncates to/from 32-bit regs.
def : Pat<(i64 (zext GPRC:$in)),
- (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+ (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32),
+ 0, 32)>;
def : Pat<(i64 (anyext GPRC:$in)),
- (OR4To8 GPRC:$in, GPRC:$in)>;
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>;
def : Pat<(i32 (trunc G8RC:$in)),
- (OR8To4 G8RC:$in, G8RC:$in)>;
+ (EXTRACT_SUBREG G8RC:$in, sub_32)>;
// Extending loads with i64 targets.
def : Pat<(zextloadi1 iaddr:$src),
@@ -765,6 +853,10 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in),
+ (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in),
+ (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>;
def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
(ADDIS8 G8RC:$in, tglobaladdr:$g)>;
def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 6c0f3d3f06e5..b0b842328196 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -274,15 +274,11 @@ let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
- VRRC:$vB))]>,
- Requires<[FPContractions]>;
+ [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fsub V_immneg0,
- (fsub (fmul VRRC:$vA, VRRC:$vC),
- VRRC:$vB)))]>,
- Requires<[FPContractions]>;
+ [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
+ (fneg VRRC:$vB))))]>;
def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index d8e4b2bdf34a..a41a0279d215 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -94,6 +94,12 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
let Inst{31} = lk;
}
+class IForm_ext<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : IForm<opcode, aa, lk, OOL, IOL, asmstr, itin, pattern> {
+ let LI{0-4} = bo;
+}
+
// 1.7.2 B-Form
class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
: I<opcode, OOL, IOL, asmstr, BrB> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index b45ada9db32a..47f09dca77d3 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -40,6 +40,10 @@ extern cl::opt<bool> DisablePPC64RS;
using namespace llvm;
+static cl::
+opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
+ cl::desc("Disable analysis for CTR loops"));
+
PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
@@ -75,6 +79,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
return new PPCScoreboardHazardRecognizer(II, DAG);
}
+
+// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
+bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: return false;
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = PPC::sub_32;
+ return true;
+ }
+}
+
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
@@ -186,10 +206,14 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
// Branch analysis.
+// Note: If the condition register is set to CTR or CTR8 then this is a
+// BDNZ (imm == 1) or BDZ (imm == 0) branch.
bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
@@ -221,7 +245,30 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
return false;
+ } else if (LastInst->getOpcode() == PPC::BDNZ8 ||
+ LastInst->getOpcode() == PPC::BDNZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BDZ8 ||
+ LastInst->getOpcode() == PPC::BDZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
}
+
// Otherwise, don't know what this is.
return true;
}
@@ -245,6 +292,34 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMBB();
return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDNZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
}
// If the block ends with two PPC:Bs, handle it. The second one is not
@@ -273,7 +348,9 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 0;
--I;
}
- if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 0;
// Remove the branch.
@@ -283,7 +360,9 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
if (I == MBB.begin()) return 1;
--I;
- if (I->getOpcode() != PPC::BCC)
+ if (I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 1;
// Remove the branch.
@@ -301,10 +380,16 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"PPC branch conditions have two components!");
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+ else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
@@ -312,8 +397,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
// Two-way Conditional Branch.
- BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
@@ -354,7 +444,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const{
DebugLoc DL;
- if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
+ if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (SrcReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
.addReg(SrcReg,
@@ -370,7 +460,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
if (SrcReg != PPC::LR8) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
.addReg(SrcReg,
@@ -386,17 +476,17 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
- } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
(!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
@@ -438,7 +528,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
}
- } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
// not cause any bug. If we need other uses of CR bits, the following
@@ -470,9 +560,9 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- PPC::CRRCRegisterClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs);
- } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// STVX VAL, 0, R0
@@ -522,7 +612,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs)const{
- if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) {
+ if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (DestReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
DestReg), FrameIdx));
@@ -531,7 +621,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::R11), FrameIdx));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
}
- } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
if (DestReg != PPC::LR8) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
FrameIdx));
@@ -540,13 +630,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::X11), FrameIdx));
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
}
- } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
FrameIdx));
- } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
- } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
(!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
@@ -578,7 +668,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
PPC::MTCRF8 : PPC::MTCRF), DestReg)
.addReg(ScratchReg));
}
- } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
@@ -607,9 +697,9 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- PPC::CRRCRegisterClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs);
- } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) {
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// Dest = LVX 0, R0
@@ -665,8 +755,11 @@ PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
bool PPCInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
- // Leave the CR# the same, but invert the condition.
- Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
+ Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
+ else
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
return false;
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 7d49aa129e36..374213ea435b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -92,6 +92,9 @@ public:
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const;
+ bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 748486c1ca26..f57f0c975ad6 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -323,7 +323,7 @@ def memri : Operand<iPTR> {
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc, ptr_rc);
+ let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
@@ -349,10 +349,10 @@ def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
@@ -438,6 +438,13 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc} ${cond:reg}, $dst"
/*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+
+ let Defs = [CTR], Uses = [CTR] in {
+ def BDZ : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst", BrB, []>;
+ def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst", BrB, []>;
+ }
}
// Darwin ABI Calls.
@@ -445,15 +452,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
def BLA_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
}
let Uses = [CTR, RM] in {
def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins variable_ops),
+ (outs), (ins),
"bctrl", BrB,
[(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
}
@@ -464,16 +471,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_SVR4 : IForm<18, 0, 1,
- (outs), (ins calltarget:$func, variable_ops),
+ (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
def BLA_SVR4 : IForm<18, 1, 1,
- (outs), (ins aaddr:$func, variable_ops),
+ (outs), (ins aaddr:$func),
"bla $func", BrB,
[(PPCcall_SVR4 (i32 imm:$func))]>;
}
let Uses = [CTR, RM] in {
def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins variable_ops),
+ (outs), (ins),
"bctrl", BrB,
[(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
}
@@ -482,18 +489,18 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :Pseudo< (outs),
- (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ (ins calltarget:$dst, i32imm:$offset),
"#TC_RETURNd $dst $offset",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset),
"#TC_RETURNa $func $offset",
[(PPCtc_return (i32 imm:$func), imm:$offset)]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
"#TC_RETURNr $dst $offset",
[]>;
@@ -704,6 +711,44 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
"lfd $rD, $addr", LdStLFD,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
+
+
+// Indexed (r+r) Loads with Update (preinc).
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lfsux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lfdux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
}
}
@@ -815,12 +860,49 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
[(store GPRC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-
-let mayStore = 1 in {
-def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
- "stwux $rS, $rA, $rB", LdStStore,
- []>;
-}
+
+def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 GPRC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 GPRC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
+ (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stfsux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
+ (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stfdux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
[(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
@@ -852,7 +934,10 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
- "addi $rD, $rA, $imm", IntGeneral,
+ "addi $rD, $rA, $imm", IntSimple,
+ [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
+ "addi $rD, $rA, $imm", IntSimple,
[(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
@@ -864,7 +949,7 @@ def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
[]>;
}
def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
- "addis $rD, $rA, $imm", IntGeneral,
+ "addis $rD, $rA, $imm", IntSimple,
[(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
"la $rD, $sym($rA)", IntGeneral,
@@ -881,10 +966,10 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
let isReMaterializable = 1 in {
def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
- "li $rD, $imm", IntGeneral,
+ "li $rD, $imm", IntSimple,
[(set GPRC:$rD, immSExt16:$imm)]>;
def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
- "lis $rD, $imm", IntGeneral,
+ "lis $rD, $imm", IntSimple,
[(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
}
}
@@ -899,18 +984,18 @@ def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
[(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IntGeneral,
+ "ori $dst, $src1, $src2", IntSimple,
[(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IntGeneral,
+ "oris $dst, $src1, $src2", IntSimple,
[(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IntGeneral,
+ "xori $dst, $src1, $src2", IntSimple,
[(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IntGeneral,
+ "xoris $dst, $src1, $src2", IntSimple,
[(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
-def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral,
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
[]>;
def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
"cmpwi $crD, $rA, $imm", IntCompare>;
@@ -921,28 +1006,28 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
let PPC970_Unit = 1 in { // FXU Operations.
def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "nand $rA, $rS, $rB", IntGeneral,
+ "nand $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "and $rA, $rS, $rB", IntGeneral,
+ "and $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "andc $rA, $rS, $rB", IntGeneral,
+ "andc $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "or $rA, $rS, $rB", IntGeneral,
+ "or $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "nor $rA, $rS, $rB", IntGeneral,
+ "nor $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "orc $rA, $rS, $rB", IntGeneral,
+ "orc $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "eqv $rA, $rS, $rB", IntGeneral,
+ "eqv $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "xor $rA, $rS, $rB", IntGeneral,
+ "xor $rA, $rS, $rB", IntSimple,
[(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"slw $rA, $rS, $rB", IntGeneral,
@@ -967,10 +1052,10 @@ def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
"cntlzw $rA, $rS", IntGeneral,
[(set GPRC:$rA, (ctlz GPRC:$rS))]>;
def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsb $rA, $rS", IntGeneral,
+ "extsb $rA, $rS", IntSimple,
[(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsh $rA, $rS", IntGeneral,
+ "extsh $rA, $rS", IntSimple,
[(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
@@ -1115,7 +1200,7 @@ def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
PPC970_MicroCode, PPC970_Unit_CRU;
def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
- "mfcr $rT, $FXM", SprMFCR>,
+ "mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
// Instructions to manipulate FPSCR. Only long double handling uses these.
@@ -1159,7 +1244,7 @@ let PPC970_Unit = 1 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
//
def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "add $rT, $rA, $rB", IntGeneral,
+ "add $rT, $rA, $rB", IntSimple,
[(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
let Defs = [CARRY] in {
def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
@@ -1194,7 +1279,7 @@ def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
PPC970_DGroup_Cracked;
}
def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
- "neg $rT, $rA", IntGeneral,
+ "neg $rT, $rA", IntSimple,
[(set GPRC:$rT, (ineg GPRC:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
@@ -1226,51 +1311,43 @@ let Uses = [RM] in {
def FMADD : AForm_1<63, 29,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
def FMADDS : AForm_1<59, 29,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
def FMSUB : AForm_1<63, 28,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
def FMSUBS : AForm_1<59, 28,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
def FNMADD : AForm_1<63, 31,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
def FNMADDS : AForm_1<59, 31,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
def FNMSUB : AForm_1<63, 30,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
+ (fneg F8RC:$FRB))))]>;
def FNMSUBS : AForm_1<59, 30,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
+ (fneg F4RC:$FRB))))]>;
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
@@ -1321,6 +1398,13 @@ let Uses = [RM] in {
}
let PPC970_Unit = 1 in { // FXU Operations.
+ def ISEL : AForm_1<31, 15,
+ (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+ "isel $rT, $rA, $rB, $cond", IntGeneral,
+ []>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
// M-Form instructions. rotate and mask instructions.
//
let isCommutable = 1 in {
@@ -1418,6 +1502,10 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in),
+ (ADDIS GPRC:$in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in),
+ (ADDIL GPRC:$in, tglobaltlsaddr:$g)>;
def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
(ADDIS GPRC:$in, tglobaladdr:$g)>;
def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
@@ -1427,14 +1515,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS GPRC:$in, tblockaddress:$g)>;
-// Fused negative multiply subtract, alternate pattern
-def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
- (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
- Requires<[FPContractions]>;
-def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
- (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
- Requires<[FPContractions]>;
-
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index a6528c0d7030..aba27399d6da 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -210,7 +210,7 @@ asm(
".text\n"
".align 2\n"
".globl PPC64CompilationCallback\n"
- ".section \".opd\",\"aw\"\n"
+ ".section \".opd\",\"aw\",@progbits\n"
".align 3\n"
"PPC64CompilationCallback:\n"
".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 276edcb69d19..19ec993ba00f 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -99,10 +99,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
MCContext &Ctx = Printer.OutContext;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
- if (MO.getTargetFlags() & PPCII::MO_LO16)
- RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : MCSymbolRefExpr::VK_PPC_GAS_LO16;
- else if (MO.getTargetFlags() & PPCII::MO_HA16)
- RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : MCSymbolRefExpr::VK_PPC_GAS_HA16;
+ unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK;
+
+ switch (access) {
+ case PPCII::MO_HA16: RefKind = isDarwin ?
+ MCSymbolRefExpr::VK_PPC_DARWIN_HA16 :
+ MCSymbolRefExpr::VK_PPC_GAS_HA16;
+ break;
+ case PPCII::MO_LO16: RefKind = isDarwin ?
+ MCSymbolRefExpr::VK_PPC_DARWIN_LO16 :
+ MCSymbolRefExpr::VK_PPC_GAS_LO16;
+ break;
+ case PPCII::MO_TPREL16_HA: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA;
+ break;
+ case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
+ break;
+ }
// FIXME: This isn't right, but we don't have a good way to express this in
// the MC Level, see below.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index ef1357137def..ab8bf1f93a37 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -89,10 +89,17 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
}
+bool
+PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return requiresRegisterScavenging(MF);
+}
+
+
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
-PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
+PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
if (Subtarget.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
@@ -192,6 +199,20 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
+bool
+PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ switch (RC->getID()) {
+ case PPC::G8RCRegClassID:
+ case PPC::GPRCRegClassID:
+ case PPC::F8RCRegClassID:
+ case PPC::F4RCRegClassID:
+ case PPC::VRRCRegClassID:
+ return true;
+ default:
+ return false;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
@@ -321,14 +342,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// address of new allocated space.
if (LP64) {
if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(Reg, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(MI.getOperand(1).getReg());
else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
@@ -342,9 +363,9 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
.addImm(maxCallFrameSize)
.addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
} else {
- BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(Reg, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index b1e6a7218ee7..152c36d699ec 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -35,7 +35,8 @@ public:
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
- virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;
+ virtual const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
@@ -46,10 +47,14 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const;
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
/// requiresRegisterScavenging - We require a register scavenger.
/// FIXME (64-bit): Should be inlined.
bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 0e55313b135f..5ca387629b6c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -314,12 +314,18 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32,
}
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
- CR7, CR2, CR3, CR4)> {
- let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
+ CR7, CR2, CR3, CR4)>;
+
+// The CTR registers are not allocatable because they're used by the
+// decrement-and-branch instructions, and thus need to stay live across
+// multiple basic blocks.
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)> {
+ let isAllocatable = 0;
+}
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
+ let isAllocatable = 0;
}
-def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>;
-def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>;
def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
let CopyCost = -1;
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 8c0a8589052a..6a6ccb9d9852 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -25,6 +25,7 @@ def VFPU : FuncUnit; // vector floating point unit
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for PowerPC
//
+def IntSimple : InstrItinClass;
def IntGeneral : InstrItinClass;
def IntCompare : InstrItinClass;
def IntDivD : InstrItinClass;
@@ -117,17 +118,17 @@ include "PPCScheduleA2.td"
//
// opcode itinerary class
// ====== ===============
-// add IntGeneral
+// add IntSimple
// addc IntGeneral
// adde IntGeneral
-// addi IntGeneral
+// addi IntSimple
// addic IntGeneral
// addic. IntGeneral
-// addis IntGeneral
+// addis IntSimple
// addme IntGeneral
// addze IntGeneral
-// and IntGeneral
-// andc IntGeneral
+// and IntSimple
+// andc IntSimple
// andi. IntGeneral
// andis. IntGeneral
// b BrB
@@ -165,10 +166,10 @@ include "PPCScheduleA2.td"
// eciwx LdStLoad
// ecowx LdStLoad
// eieio LdStLoad
-// eqv IntGeneral
-// extsb IntGeneral
-// extsh IntGeneral
-// extsw IntRotateD
+// eqv IntSimple
+// extsb IntSimple
+// extsh IntSimple
+// extsw IntSimple
// fabs FPGeneral
// fadd FPGeneral
// fadds FPGeneral
@@ -280,13 +281,13 @@ include "PPCScheduleA2.td"
// mulld IntMulHD
// mulli IntMulLI
// mullw IntMulHW
-// nand IntGeneral
-// neg IntGeneral
-// nor IntGeneral
-// or IntGeneral
-// orc IntGeneral
-// ori IntGeneral
-// oris IntGeneral
+// nand IntSimple
+// neg IntSimple
+// nor IntSimple
+// or IntSimple
+// orc IntSimple
+// ori IntSimple
+// oris IntSimple
// rfi SprRFI
// rfid IntRFID
// rldcl IntRotateD
@@ -502,7 +503,7 @@ include "PPCScheduleA2.td"
// vupklsb VecPerm
// vupklsh VecPerm
// vxor VecGeneral
-// xor IntGeneral
-// xori IntGeneral
-// xoris IntGeneral
+// xor IntSimple
+// xori IntSimple
+// xoris IntSimple
//
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
index 419faea30220..cd0fb70a24bd 100644
--- a/lib/Target/PowerPC/PPCSchedule440.td
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -108,6 +108,15 @@ def PPC440Itineraries : ProcessorItineraries<
IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
[GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
@@ -373,26 +382,6 @@ def PPC440Itineraries : ProcessorItineraries<
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1]>,
- InstrStage<1, [IRACC], 0>,
- InstrStage<4, [LWARX_Hold], 0>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1]>,
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index 857ba40ff622..4d4a5d0e1b2f 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -60,6 +60,17 @@ def PPCA2Itineraries : ProcessorItineraries<
IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6,
FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6],
[CR_Bypass, GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntSimple , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntGeneral , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -159,6 +170,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[10, 7, 7],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotateD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntShift , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -181,6 +203,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[10, 7, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [10, 7, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<BrB , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -269,6 +302,17 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[14, 7],
[GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLD , [InstrStage<4,
+ [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+ InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+ IU4_4, IU4_5, IU4_6, IU4_7]>,
+ InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+ InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+ InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+ InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+ InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+ [14, 7],
+ [GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStStore , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
@@ -379,28 +423,6 @@ def PPCA2Itineraries : ProcessorItineraries<
InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
[26, 7],
[NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTWCX , [InstrStage<4,
[IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index bc926f7bb2b6..61e89ed32c20 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -14,6 +14,7 @@
def G3Itineraries : ProcessorItineraries<
[IU1, IU2, FPU1, BPU, SRU, SLU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index f7ec1e01333e..e19ddfa80ea3 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -13,6 +13,7 @@
def G4Itineraries : ProcessorItineraries<
[IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 37ebfc59880b..e7446cb028a3 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -16,6 +16,7 @@ def IU4 : FuncUnit; // integer unit 4 (7450 simple)
def G4PlusItineraries : ProcessorItineraries<
[IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index d1e40cef9639..137149972680 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -13,6 +13,7 @@
def G5Itineraries : ProcessorItineraries<
[IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
+ InstrItinData<IntSimple , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index f405b4711a52..bb193ac3d9ef 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -16,6 +16,7 @@
#include "PPC.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include <cstdlib>
@@ -25,56 +26,19 @@
using namespace llvm;
-#if defined(__APPLE__)
-#include <mach/mach.h>
-#include <mach/mach_host.h>
-#include <mach/host_info.h>
-#include <mach/machine.h>
-
-/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
-static const char *GetCurrentPowerPCCPU() {
- host_basic_info_data_t hostInfo;
- mach_msg_type_number_t infoCount;
-
- infoCount = HOST_BASIC_INFO_COUNT;
- host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
- &infoCount);
-
- if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
-
- switch(hostInfo.cpu_subtype) {
- case CPU_SUBTYPE_POWERPC_601: return "601";
- case CPU_SUBTYPE_POWERPC_602: return "602";
- case CPU_SUBTYPE_POWERPC_603: return "603";
- case CPU_SUBTYPE_POWERPC_603e: return "603e";
- case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
- case CPU_SUBTYPE_POWERPC_604: return "604";
- case CPU_SUBTYPE_POWERPC_604e: return "604e";
- case CPU_SUBTYPE_POWERPC_620: return "620";
- case CPU_SUBTYPE_POWERPC_750: return "750";
- case CPU_SUBTYPE_POWERPC_7400: return "7400";
- case CPU_SUBTYPE_POWERPC_7450: return "7450";
- case CPU_SUBTYPE_POWERPC_970: return "970";
- default: ;
- }
-
- return "generic";
-}
-#endif
-
-
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
: PPCGenSubtargetInfo(TT, CPU, FS)
, StackAlignment(16)
, DarwinDirective(PPC::DIR_NONE)
- , IsGigaProcessor(false)
+ , HasMFOCRF(false)
, Has64BitSupport(false)
, Use64BitRegs(false)
, IsPPC64(is64Bit)
, HasAltivec(false)
, HasFSQRT(false)
, HasSTFIWX(false)
+ , HasISEL(false)
, IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
@@ -84,9 +48,10 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
-#if defined(__APPLE__)
+#if (defined(__APPLE__) || defined(__linux__)) && \
+ (defined(__ppc__) || defined(__powerpc__))
if (CPUName == "generic")
- CPUName = GetCurrentPowerPCCPU();
+ CPUName = sys::getHostCPUName();
#endif
// Parse features string.
@@ -146,10 +111,14 @@ bool PPCSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2)
- Mode = TargetSubtargetInfo::ANTIDEP_ALL;
- else
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here,
+ // but we can't because we can't reassign the cr registers. There is a
+ // dependence between the cr register and the RLWINM instruction used
+ // to extract its value which the anti-dependency breaker can't currently
+ // see. Maybe we should make a late-expanded pseudo to encode this dependency.
+ // (the relevant code is in PPCDAGToDAGISel::SelectSETCC)
+
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
@@ -157,6 +126,9 @@ bool PPCSubtarget::enablePostRAScheduler(
CriticalPathRCs.push_back(&PPC::G8RCRegClass);
else
CriticalPathRCs.push_back(&PPC::GPRCRegClass);
+
+ CriticalPathRCs.push_back(&PPC::F8RCRegClass);
+ CriticalPathRCs.push_back(&PPC::VRRCRegClass);
return OptLevel >= CodeGenOpt::Default;
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index a275029d3e5d..0207c833938b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -41,6 +41,8 @@ namespace PPC {
DIR_750,
DIR_970,
DIR_A2,
+ DIR_PWR6,
+ DIR_PWR7,
DIR_64
};
}
@@ -61,13 +63,14 @@ protected:
unsigned DarwinDirective;
/// Used by the ISel to turn in optimizations for POWER4-derived architectures
- bool IsGigaProcessor;
+ bool HasMFOCRF;
bool Has64BitSupport;
bool Use64BitRegs;
bool IsPPC64;
bool HasAltivec;
bool HasFSQRT;
bool HasSTFIWX;
+ bool HasISEL;
bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@@ -138,7 +141,8 @@ public:
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasAltivec() const { return HasAltivec; }
- bool isGigaProcessor() const { return IsGigaProcessor; }
+ bool hasMFOCRF() const { return HasMFOCRF; }
+ bool hasISEL() const { return HasISEL; }
bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 50f3db8b27fd..980511268a31 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -17,10 +17,15 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
+static cl::
+opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
+ cl::desc("Disable CTR loops for PPC"));
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -81,41 +86,37 @@ public:
return getTM<PPCTargetMachine>();
}
+ virtual bool addPreRegAlloc();
virtual bool addInstSelector();
virtual bool addPreEmitPass();
};
} // namespace
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
- TargetPassConfig *PassConfig = new PPCPassConfig(this, PM);
+ return new PPCPassConfig(this, PM);
+}
- // Override this for PowerPC. Tail merging happily breaks up instruction issue
- // groups, which typically degrades performance.
- PassConfig->setEnableTailMerge(false);
+bool PPCPassConfig::addPreRegAlloc() {
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoops());
- return PassConfig;
+ return false;
}
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createPPCISelDag(getPPCTargetMachine()));
+ addPass(createPPCISelDag(getPPCTargetMachine()));
return false;
}
bool PPCPassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
- PM->add(createPPCBranchSelectionPass());
+ addPass(createPPCBranchSelectionPass());
return false;
}
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE) {
- // FIXME: This should be moved to TargetJITInfo!!
- if (Subtarget.isPPC64())
- // Temporary workaround for the inability of PPC64 JIT to handle jump
- // tables.
- Options.DisableJumpTables = true;
-
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
Subtarget.SetJITMode();
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 349cd890d5ee..b6763aa73802 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -2,7 +2,6 @@
TODO:
* gpr0 allocation
-* implement do-loop -> bdnz transform
* lmw/stmw pass a la arm load store optimizer for prolog/epilog
===-------------------------------------------------------------------------===
diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile
index a101aa4a4495..2d0560d275f9 100644
--- a/lib/Target/PowerPC/TargetInfo/Makefile
+++ b/lib/Target/PowerPC/TargetInfo/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
LIBRARYNAME = LLVMPowerPCInfo
# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+override CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common