aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp8
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp38
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h2
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td5
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp1
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.cpp4
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td6
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp22
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h3
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp55
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp226
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h6
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp25
-rw-r--r--lib/Target/AMDGPU/DSInstructions.td2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp29
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h8
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARM.td43
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp166
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h23
-rw-r--r--lib/Target/ARM/ARMCallingConv.td6
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp16
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp3
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp50
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp128
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp66
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td54
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td4
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp14
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp5
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.h6
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp180
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp5
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp10
-rw-r--r--lib/Target/Hexagon/BitTracker.h10
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp70
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/Hexagon/RDFCopy.cpp2
-rw-r--r--lib/Target/Hexagon/RDFGraph.h3
-rw-r--r--lib/Target/Hexagon/RDFRegisters.cpp47
-rw-r--r--lib/Target/Hexagon/RDFRegisters.h13
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp2
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td74
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp25
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp4
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp4
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp2
-rw-r--r--lib/Target/WebAssembly/known_gcc_test_failures.txt28
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp8
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp9
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp5
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp63
-rw-r--r--lib/Target/X86/X86ISelLowering.h2
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp61
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.cpp1
-rw-r--r--lib/Target/X86/X86RegisterInfo.h5
-rw-r--r--lib/Target/X86/X86RegisterInfo.td1
79 files changed, 966 insertions, 756 deletions
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ae01ea477bb9..7141e77fcd25 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1865,7 +1865,7 @@ static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
getUsefulBits(Op, OpUsefulBits, Depth + 1);
// The interesting part was at zero in the argument
- OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
+ OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
}
UsefulBits &= OpUsefulBits;
@@ -1894,13 +1894,13 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
Mask = Mask.shl(ShiftAmt);
getUsefulBits(Op, Mask, Depth + 1);
- Mask = Mask.lshr(ShiftAmt);
+ Mask.lshrInPlace(ShiftAmt);
} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
// Shift Right
// We do not handle AArch64_AM::ASR, because the sign will change the
// number of useful bits
uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
- Mask = Mask.lshr(ShiftAmt);
+ Mask.lshrInPlace(ShiftAmt);
getUsefulBits(Op, Mask, Depth + 1);
Mask = Mask.shl(ShiftAmt);
} else
@@ -1954,7 +1954,7 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
if (Op.getOperand(1) == Orig) {
// Copy the bits from the result to the zero bits.
Mask = ResultUsefulBits & OpUsefulBits;
- Mask = Mask.lshr(LSB);
+ Mask.lshrInPlace(LSB);
}
if (Op.getOperand(0) == Orig)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0d3289ac84c3..4ddc95199d4c 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3239,30 +3239,26 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- if (getTargetMachine().getCodeModel() == CodeModel::Large &&
- Subtarget->isTargetMachO()) {
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ auto GV = G->getGlobal();
+ if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
+ AArch64II::MO_GOT) {
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
+ } else {
const GlobalValue *GV = G->getGlobal();
- bool InternalLinkage = GV->hasInternalLinkage();
- if (InternalLinkage)
- Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
- else {
- Callee =
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
- Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
- }
- } else if (ExternalSymbolSDNode *S =
- dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
+ }
+ } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (getTargetMachine().getCodeModel() == CodeModel::Large &&
+ Subtarget->isTargetMachO()) {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
+ } else {
+ const char *Sym = S->getSymbol();
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
- } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- const char *Sym = S->getSymbol();
- Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
// We don't usually want to end the call-sequence here because we would tidy
@@ -7130,7 +7126,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
if (I->getOpcode() != Instruction::FMul)
return true;
- if (I->getNumUses() != 1)
+ if (!I->hasOneUse())
return true;
Instruction *User = I->user_back();
@@ -10395,7 +10391,7 @@ bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
// call. This will cause the optimizers to attempt to move, or duplicate,
// return instructions to help enable tail call optimizations for this
// instruction.
-bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 2ad6c8b23df8..a023b4373835 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -593,7 +593,7 @@ private:
}
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM, bool &IsInc,
SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 4449412532f3..82e9c5a88e3b 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2586,6 +2586,11 @@ def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
}
+// Similarly add aliases
+def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
+ Requires<[HasFullFP16]>;
+def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
+def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 878dac6bff1e..5e01b6cd2b46 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -20,6 +20,7 @@
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 20a5979f9b4b..6f9021c4a030 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -482,7 +482,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
auto &MO = MI.getOperand(Idx);
- if (!MO.isReg())
+ if (!MO.isReg() || !MO.getReg())
continue;
LLT Ty = MRI.getType(MO.getReg());
@@ -537,7 +537,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands};
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
- if (MI.getOperand(Idx).isReg()) {
+ if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
if (!Mapping->isValid())
return InstructionMapping();
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index 6bce4ef6b652..4bd77d344488 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -265,6 +265,12 @@ def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4
// Arithmetic and Logical Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>;
// SIMD Miscellaneous Instructions
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index b3aba4781db8..042755bd36d0 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -35,6 +35,11 @@ static cl::opt<bool>
UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
"an address is ignored"), cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ UseNonLazyBind("aarch64-enable-nonlazybind",
+ cl::desc("Call nonlazybind functions via direct GOT load"),
+ cl::init(false), cl::Hidden);
+
AArch64Subtarget &
AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
StringRef CPUString) {
@@ -155,6 +160,23 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
return AArch64II::MO_NO_FLAG;
}
+unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
+ const GlobalValue *GV, const TargetMachine &TM) const {
+ // MachO large model always goes via a GOT, because we don't have the
+ // relocations available to do anything else..
+ if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
+ !GV->hasInternalLinkage())
+ return AArch64II::MO_GOT;
+
+ // NonLazyBind goes via GOT unless we know it's available locally.
+ auto *F = dyn_cast<Function>(GV);
+ if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
+ !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return AArch64II::MO_GOT;
+
+ return AArch64II::MO_NO_FLAG;
+}
+
/// This function returns the name of a function which has an interface
/// like the non-standard bzero function, if such a function exists on
/// the current subtarget and it is considered prefereable over
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 40ad9185012c..3d66a9ea8ce6 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -271,6 +271,9 @@ public:
unsigned char ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const;
+ unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
/// This function returns the name of a function which has an interface
/// like the non-standard bzero function, if such a function exists on
/// the current subtarget and it is considered prefereable over
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index cbab68979c56..d7bbc2bcd22c 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -2100,27 +2100,9 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
bool isNegative = parseOptionalToken(AsmToken::Minus);
const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Real)) {
- APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
- if (isNegative)
- RealVal.changeSign();
-
- uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
- Parser.Lex(); // Eat the token.
- // Check for out of range values. As an exception, we let Zero through,
- // as we handle that special case in post-processing before matching in
- // order to use the zero register for it.
- if (Val == -1 && !RealVal.isPosZero()) {
- TokError("expected compatible register or floating-point constant");
- return MatchOperand_ParseFail;
- }
- Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext()));
- return MatchOperand_Success;
- }
- if (Tok.is(AsmToken::Integer)) {
+ if (Tok.is(AsmToken::Real) || Tok.is(AsmToken::Integer)) {
int64_t Val;
- if (!isNegative && Tok.getString().startswith("0x")) {
+ if (Tok.is(AsmToken::Integer) && !isNegative && Tok.getString().startswith("0x")) {
Val = Tok.getIntVal();
if (Val > 255 || Val < 0) {
TokError("encoded floating point value out of range");
@@ -2128,10 +2110,24 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
}
} else {
APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
+ if (isNegative)
+ RealVal.changeSign();
+
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- // If we had a '-' in front, toggle the sign bit.
- IntVal ^= (uint64_t)isNegative << 63;
Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
+
+ // Check for out of range values. As an exception we let Zero through,
+ // but as tokens instead of an FPImm so that it can be matched by the
+ // appropriate alias if one exists.
+ if (RealVal.isPosZero()) {
+ Parser.Lex(); // Eat the token.
+ Operands.push_back(AArch64Operand::CreateToken("#0", false, S, getContext()));
+ Operands.push_back(AArch64Operand::CreateToken(".0", false, S, getContext()));
+ return MatchOperand_Success;
+ } else if (Val == -1) {
+ TokError("expected compatible register or floating-point constant");
+ return MatchOperand_ParseFail;
+ }
}
Parser.Lex(); // Eat the token.
Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext()));
@@ -3655,21 +3651,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
}
- // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
- if (NumOperands == 3 && Tok == "fmov") {
- AArch64Operand &RegOp = static_cast<AArch64Operand &>(*Operands[1]);
- AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]);
- if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) {
- unsigned zreg =
- !AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains(
- RegOp.getReg())
- ? AArch64::WZR
- : AArch64::XZR;
- Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(),
- Op.getEndLoc(), getContext());
- }
- }
-
MCInst Inst;
// First try to match against the secondary set of tables containing the
// short-form NEON instructions (e.g. "fadd.2s v0, v1, v2").
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 8fc822329595..94112849f84e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -39,7 +39,7 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
PrivateLabelPrefix = "L";
SeparatorString = "%%";
CommentString = ";";
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
@@ -71,7 +71,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
// We prefer NEON instructions to be printed in the short form.
AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant;
- PointerSize = 8;
+ CodePointerSize = 8;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0446655830d1..a81bcb56dfdc 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -144,6 +144,10 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
}
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
+ const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>();
+ if (!MFI->isEntryFunction())
+ return;
+
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
amd_kernel_code_t KernelCode;
@@ -184,9 +188,11 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
// The starting address of all shader programs must be 256 bytes aligned.
- MF.setAlignment(8);
+ // Regular functions just need the basic required instruction alignment.
+ MF.setAlignment(MFI->isEntryFunction() ? 8 : 2);
SetupMachineFunction(MF);
@@ -220,13 +226,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->SwitchSection(CommentSection);
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- OutStreamer->emitRawComment(" Kernel info:", false);
- OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
- false);
+ if (MFI->isEntryFunction()) {
+ OutStreamer->emitRawComment(" Kernel info:", false);
+ } else {
+ OutStreamer->emitRawComment(" Function info:", false);
+ }
+
+ OutStreamer->emitRawComment(" codeLenInByte = " +
+ Twine(getFunctionCodeSize(MF)), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
false);
+
OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
false);
OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
@@ -236,6 +248,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
" bytes/workgroup (compile time only)", false);
+ if (!MFI->isEntryFunction())
+ return false;
+
OutStreamer->emitRawComment(" SGPRBlocks: " +
Twine(KernelInfo.SGPRBlocks), false);
OutStreamer->emitRawComment(" VGPRBlocks: " +
@@ -317,7 +332,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
const MachineOperand &MO = MI.getOperand(op_idx);
if (!MO.isReg())
continue;
- unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
+ unsigned HWReg = RI->getHWRegIndex(MO.getReg());
// Register with value > 127 aren't GPR
if (HWReg > 127)
@@ -360,18 +375,12 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
}
}
-void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
- const MachineFunction &MF) const {
+uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- uint64_t CodeSize = 0;
- unsigned MaxSGPR = 0;
- unsigned MaxVGPR = 0;
- bool VCCUsed = false;
- bool FlatUsed = false;
- const SIRegisterInfo *RI = STM.getRegisterInfo();
const SIInstrInfo *TII = STM.getInstrInfo();
+ uint64_t CodeSize = 0;
+
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
// TODO: CodeSize should account for multiple functions.
@@ -380,122 +389,86 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (MI.isDebugValue())
continue;
- if (isVerbose())
- CodeSize += TII->getInstSizeInBytes(MI);
+ CodeSize += TII->getInstSizeInBytes(MI);
+ }
+ }
- unsigned numOperands = MI.getNumOperands();
- for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- const MachineOperand &MO = MI.getOperand(op_idx);
- unsigned width = 0;
- bool isSGPR = false;
+ return CodeSize;
+}
- if (!MO.isReg())
- continue;
+static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
+ const SIInstrInfo &TII,
+ unsigned Reg) {
+ for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
+ if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
+ return true;
+ }
- unsigned reg = MO.getReg();
- switch (reg) {
- case AMDGPU::EXEC:
- case AMDGPU::EXEC_LO:
- case AMDGPU::EXEC_HI:
- case AMDGPU::SCC:
- case AMDGPU::M0:
- case AMDGPU::SRC_SHARED_BASE:
- case AMDGPU::SRC_SHARED_LIMIT:
- case AMDGPU::SRC_PRIVATE_BASE:
- case AMDGPU::SRC_PRIVATE_LIMIT:
- continue;
+ return false;
+}
- case AMDGPU::VCC:
- case AMDGPU::VCC_LO:
- case AMDGPU::VCC_HI:
- VCCUsed = true;
- continue;
+void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
+ const MachineFunction &MF) const {
+ const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII = STM.getInstrInfo();
+ const SIRegisterInfo *RI = &TII->getRegisterInfo();
- case AMDGPU::FLAT_SCR:
- case AMDGPU::FLAT_SCR_LO:
- case AMDGPU::FLAT_SCR_HI:
- // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
- // instructions aren't used to access the scratch buffer.
- if (MFI->hasFlatScratchInit())
- FlatUsed = true;
- continue;
- case AMDGPU::TBA:
- case AMDGPU::TBA_LO:
- case AMDGPU::TBA_HI:
- case AMDGPU::TMA:
- case AMDGPU::TMA_LO:
- case AMDGPU::TMA_HI:
- llvm_unreachable("trap handler registers should not be used");
-
- default:
- break;
- }
-
- if (AMDGPU::SReg_32RegClass.contains(reg)) {
- assert(!AMDGPU::TTMP_32RegClass.contains(reg) &&
- "trap handler registers should not be used");
- isSGPR = true;
- width = 1;
- } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
- isSGPR = false;
- width = 1;
- } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
- assert(!AMDGPU::TTMP_64RegClass.contains(reg) &&
- "trap handler registers should not be used");
- isSGPR = true;
- width = 2;
- } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
- isSGPR = false;
- width = 2;
- } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
- isSGPR = false;
- width = 3;
- } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
- isSGPR = true;
- width = 4;
- } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
- isSGPR = false;
- width = 4;
- } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
- isSGPR = true;
- width = 8;
- } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
- isSGPR = false;
- width = 8;
- } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
- isSGPR = true;
- width = 16;
- } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
- isSGPR = false;
- width = 16;
- } else {
- llvm_unreachable("Unknown register class");
- }
- unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
- unsigned maxUsed = hwReg + width - 1;
- if (isSGPR) {
- MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
- } else {
- MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
- }
- }
+ MCPhysReg NumVGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ NumVGPRReg = Reg;
+ break;
+ }
+ }
+
+ MCPhysReg NumSGPRReg = AMDGPU::NoRegister;
+ for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ NumSGPRReg = Reg;
+ break;
}
}
+ // We found the maximum register index. They start at 0, so add one to get the
+ // number of registers.
+ ProgInfo.NumVGPR = NumVGPRReg == AMDGPU::NoRegister ? 0 :
+ RI->getHWRegIndex(NumVGPRReg) + 1;
+ ProgInfo.NumSGPR = NumSGPRReg == AMDGPU::NoRegister ? 0 :
+ RI->getHWRegIndex(NumSGPRReg) + 1;
unsigned ExtraSGPRs = 0;
- if (VCCUsed)
+ ProgInfo.VCCUsed = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
+ MRI.isPhysRegUsed(AMDGPU::VCC_HI);
+ if (ProgInfo.VCCUsed)
ExtraSGPRs = 2;
+ ProgInfo.FlatUsed = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
+ MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
+
+ // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
+ // instructions aren't used to access the scratch buffer. Inline assembly
+ // may need it though.
+ //
+ // If we only have implicit uses of flat_scr on flat instructions, it is not
+ // really needed.
+ if (ProgInfo.FlatUsed && !MFI->hasFlatScratchInit() &&
+ (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
+ !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
+ !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
+ ProgInfo.FlatUsed = false;
+ }
+
if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
- if (FlatUsed)
+ if (ProgInfo.FlatUsed)
ExtraSGPRs = 4;
} else {
if (STM.isXNACKEnabled())
ExtraSGPRs = 4;
- if (FlatUsed)
+ if (ProgInfo.FlatUsed)
ExtraSGPRs = 6;
}
@@ -505,34 +478,29 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
- if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
+ if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm.
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"addressable scalar registers",
- MaxSGPR + 1, DS_Error,
+ ProgInfo.NumSGPR, DS_Error,
DK_ResourceLimit,
MaxAddressableNumSGPRs);
Ctx.diagnose(Diag);
- MaxSGPR = MaxAddressableNumSGPRs - 1;
+ ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
}
}
// Account for extra SGPRs and VGPRs reserved for debugger use.
- MaxSGPR += ExtraSGPRs;
- MaxVGPR += ExtraVGPRs;
-
- // We found the maximum register index. They start at 0, so add one to get the
- // number of registers.
- ProgInfo.NumSGPR = MaxSGPR + 1;
- ProgInfo.NumVGPR = MaxVGPR + 1;
+ ProgInfo.NumSGPR += ExtraSGPRs;
+ ProgInfo.NumVGPR += ExtraVGPRs;
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
ProgInfo.NumSGPRsForWavesPerEU = std::max(
- ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
+ std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
ProgInfo.NumVGPRsForWavesPerEU = std::max(
- ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
+ std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
STM.hasSGPRInitBug()) {
@@ -559,10 +527,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
}
- if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
+ if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs",
- MFI->NumUserSGPRs, DS_Error);
+ MFI->getNumUserSGPRs(), DS_Error);
Ctx.diagnose(Diag);
}
@@ -584,7 +552,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
// Record first reserved VGPR and number of reserved VGPRs.
- ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
+ ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0;
ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
@@ -609,10 +577,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
ProgInfo.ScratchSize = FrameInfo.getStackSize();
- ProgInfo.FlatUsed = FlatUsed;
- ProgInfo.VCCUsed = VCCUsed;
- ProgInfo.CodeLen = CodeSize;
-
unsigned LDSAlignShift;
if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
// LDS is allocated in 64 dword blocks.
@@ -623,7 +587,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
unsigned LDSSpillSize =
- MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize();
+ MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
ProgInfo.LDSBlocks =
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 13425c8b2a0f..8c86dea4b885 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -55,7 +55,7 @@ private:
uint32_t NumVGPR = 0;
uint32_t NumSGPR = 0;
- uint32_t LDSSize;
+ uint32_t LDSSize = 0;
bool FlatUsed = false;
// Number of SGPRs that meets number of waves per execution unit request.
@@ -85,11 +85,11 @@ private:
// Bonus information for debugging.
bool VCCUsed = false;
- uint64_t CodeLen = 0;
SIProgramInfo() = default;
};
+ uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 36bc2498781f..a5cda817ac11 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -415,9 +415,11 @@ public:
return 0;
}
+ // Scratch is allocated in 256 dword per wave blocks for the entire
+ // wavefront. When viewed from the perspecive of an arbitrary workitem, this
+ // is 4-byte aligned.
unsigned getStackAlignment() const {
- // Scratch is allocated in 256 dword per wave blocks.
- return 4 * 256 / getWavefrontSize();
+ return 4;
}
bool enableMachineScheduler() const override {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 01ac9968181a..6edd3e923ba1 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -426,16 +426,23 @@ static bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
// Arguments to compute shaders are never a source of divergence.
- if (!AMDGPU::isShader(F->getCallingConv()))
+ CallingConv::ID CC = F->getCallingConv();
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
return true;
-
- // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
- if (F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
- F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal))
- return true;
-
- // Everything else is in VGPRs.
- return false;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
+ // Everything else is in VGPRs.
+ return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
+ F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
+ default:
+ // TODO: Should calls support inreg for SGPR inputs?
+ return false;
+ }
}
///
diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td
index a9f64589fa5e..357e18108e7e 100644
--- a/lib/Target/AMDGPU/DSInstructions.td
+++ b/lib/Target/AMDGPU/DSInstructions.td
@@ -255,8 +255,6 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
[(set i32:$vdst,
(node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
- let LGKM_CNT = 0;
-
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 1655591abf39..6c61fb1f2d6b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -14,6 +14,7 @@
using namespace llvm;
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
+ CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4;
HasSingleParameterDotFile = false;
//===------------------------------------------------------------------===//
MinInstAlignment = 4;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 7268131396dc..dd867b15b4c7 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -461,6 +461,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand);
+ } else {
+ setOperationAction(ISD::SELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2f16, Custom);
+ }
+
+ for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8 }) {
+ setOperationAction(ISD::SELECT, VT, Custom);
}
setTargetDAGCombine(ISD::FADD);
@@ -2191,6 +2198,28 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
break;
}
}
+ case ISD::SELECT: {
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+ EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
+ SDValue LHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(1));
+ SDValue RHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(2));
+
+ EVT SelectVT = NewVT;
+ if (NewVT.bitsLT(MVT::i32)) {
+ LHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, RHS);
+ SelectVT = MVT::i32;
+ }
+
+ SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, SelectVT,
+ N->getOperand(0), LHS, RHS);
+
+ if (NewVT != SelectVT)
+ NewSelect = DAG.getNode(ISD::TRUNCATE, SL, NewVT, NewSelect);
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, VT, NewSelect));
+ return;
+ }
default:
break;
}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index a84f3e274f82..810fb05984c4 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -133,14 +133,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
AMDGPUBufferPseudoSourceValue BufferPSV;
AMDGPUImagePseudoSourceValue ImagePSV;
-public:
- // FIXME: Make private
+private:
unsigned LDSWaveSpillSize;
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
unsigned NumSystemSGPRs;
-private:
bool HasSpilledSGPRs;
bool HasSpilledVGPRs;
bool HasNonSpillStackObjects;
@@ -535,6 +533,10 @@ public:
llvm_unreachable("unexpected dimension");
}
+ unsigned getLDSWaveSpillSize() const {
+ return LDSWaveSpillSize;
+ }
+
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
return &BufferPSV;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 36d4df52ff0e..098c67252dd8 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -124,7 +124,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
unsigned RegCount = ST.getMaxNumSGPRs(MF);
unsigned Reg;
- // Try to place it in a hole after PrivateSegmentbufferReg.
+ // Try to place it in a hole after PrivateSegmentBufferReg.
if (RegCount & 3) {
// We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
// alignment constraints, so we have a hole where can put the wave offset.
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 57f9d1c6b610..005b74a68af3 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -67,8 +67,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
[FeatureFPARMv8]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
"Restrict FP to 16 double registers">;
-def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
- "Enable divide instructions">;
+def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb",
+ "true",
+ "Enable divide instructions in Thumb">;
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
"HasHardwareDivideInARM", "true",
"Enable divide instructions in ARM mode">;
@@ -225,7 +226,7 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
def FeatureVirtualization : SubtargetFeature<"virtualization",
"HasVirtualization", "true",
"Supports Virtualization extension",
- [FeatureHWDiv, FeatureHWDivARM]>;
+ [FeatureHWDivThumb, FeatureHWDivARM]>;
// M-series ISA
def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
@@ -433,21 +434,21 @@ def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops,
def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
FeatureDB,
FeatureDSP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureRClass]>;
def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
FeatureThumb2,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureMClass]>;
def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
FeatureThumb2,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureMClass,
FeatureDSP]>;
@@ -502,7 +503,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline",
[HasV8MBaselineOps,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureV7Clrex,
Feature8MSecExt,
FeatureAcquireRelease,
@@ -512,7 +513,7 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline",
[HasV8MMainlineOps,
FeatureNoARM,
FeatureDB,
- FeatureHWDiv,
+ FeatureHWDivThumb,
Feature8MSecExt,
FeatureAcquireRelease,
FeatureMClass]>;
@@ -678,7 +679,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
FeatureFP16,
FeatureAvoidPartialCPSR,
FeatureVFP4,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM]>;
def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
@@ -686,7 +687,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureNEONForFP,
FeatureVFP4,
FeatureMP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
@@ -768,39 +769,39 @@ def : ProcNoItin<"cortex-m33", [ARMv8mMainline,
FeatureVFPOnlySP]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO]>;
def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC,
FeatureFPAO]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
@@ -811,7 +812,7 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureNEONForFP,
FeatureVFP4,
FeatureMP,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
@@ -820,25 +821,25 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureZCZeroing]>;
def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
- FeatureHWDiv,
+ FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
FeatureCRC]>;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index eb0d410b596b..14e197f477f1 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -589,12 +589,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
ATS.finishAttributeSection();
}
-static bool isV8M(const ARMSubtarget *Subtarget) {
- // Note that v8M Baseline is a subset of v6T2!
- return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) ||
- Subtarget->hasV8MMainlineOps();
-}
-
//===----------------------------------------------------------------------===//
// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
// FIXME:
@@ -602,39 +596,6 @@ static bool isV8M(const ARMSubtarget *Subtarget) {
// to appear in the .ARM.attributes section in ELF.
// Instead of subclassing the MCELFStreamer, we do the work here.
-static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
- const ARMSubtarget *Subtarget) {
- if (CPU == "xscale")
- return ARMBuildAttrs::v5TEJ;
-
- if (Subtarget->hasV8Ops()) {
- if (Subtarget->isRClass())
- return ARMBuildAttrs::v8_R;
- return ARMBuildAttrs::v8_A;
- } else if (Subtarget->hasV8MMainlineOps())
- return ARMBuildAttrs::v8_M_Main;
- else if (Subtarget->hasV7Ops()) {
- if (Subtarget->isMClass() && Subtarget->hasDSP())
- return ARMBuildAttrs::v7E_M;
- return ARMBuildAttrs::v7;
- } else if (Subtarget->hasV6T2Ops())
- return ARMBuildAttrs::v6T2;
- else if (Subtarget->hasV8MBaselineOps())
- return ARMBuildAttrs::v8_M_Base;
- else if (Subtarget->hasV6MOps())
- return ARMBuildAttrs::v6S_M;
- else if (Subtarget->hasV6Ops())
- return ARMBuildAttrs::v6;
- else if (Subtarget->hasV5TEOps())
- return ARMBuildAttrs::v5TE;
- else if (Subtarget->hasV5TOps())
- return ARMBuildAttrs::v5T;
- else if (Subtarget->hasV4TOps())
- return ARMBuildAttrs::v4T;
- else
- return ARMBuildAttrs::v4;
-}
-
// Returns true if all functions have the same function attribute value.
// It also returns true when the module has no functions.
static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr,
@@ -671,89 +632,8 @@ void ARMAsmPrinter::emitAttributes() {
static_cast<const ARMBaseTargetMachine &>(TM);
const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
- const std::string &CPUString = STI.getCPUString();
-
- if (!StringRef(CPUString).startswith("generic")) {
- // FIXME: remove krait check when GNU tools support krait cpu
- if (STI.isKrait()) {
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
- // We consider krait as a "cortex-a9" + hwdiv CPU
- // Enable hwdiv through ".arch_extension idiv"
- if (STI.hasDivide() || STI.hasDivideInARMMode())
- ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
- } else
- ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
- }
-
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI));
-
- // Tag_CPU_arch_profile must have the default value of 0 when "Architecture
- // profile is not applicable (e.g. pre v7, or cross-profile code)".
- if (STI.hasV7Ops() || isV8M(&STI)) {
- if (STI.isAClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::ApplicationProfile);
- } else if (STI.isRClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::RealTimeProfile);
- } else if (STI.isMClass()) {
- ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::MicroControllerProfile);
- }
- }
-
- ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use,
- STI.hasARMOps() ? ARMBuildAttrs::Allowed
- : ARMBuildAttrs::Not_Allowed);
- if (isV8M(&STI)) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumbDerived);
- } else if (STI.isThumb1Only()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
- } else if (STI.hasThumb2()) {
- ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
- }
-
- if (STI.hasNEON()) {
- /* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (STI.hasFPARMv8()) {
- if (STI.hasCrypto())
- ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
- else
- ATS.emitFPU(ARM::FK_NEON_FP_ARMV8);
- } else if (STI.hasVFP4())
- ATS.emitFPU(ARM::FK_NEON_VFPV4);
- else
- ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
- // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
- if (STI.hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
- ARMBuildAttrs::AllowNeonARMv8);
- } else {
- if (STI.hasFPARMv8())
- // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
- // FPU, but there are two different names for it depending on the CPU.
- ATS.emitFPU(STI.hasD16()
- ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16)
- : ARM::FK_FP_ARMV8);
- else if (STI.hasVFP4())
- ATS.emitFPU(STI.hasD16()
- ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
- : ARM::FK_VFPV4);
- else if (STI.hasVFP3())
- ATS.emitFPU(STI.hasD16()
- // +d16
- ? (STI.isFPOnlySP()
- ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
- : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
- // -d16
- : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
- else if (STI.hasVFP2())
- ATS.emitFPU(ARM::FK_VFPV2);
- }
+ // Emit build attributes for the available hardware.
+ ATS.emitTargetAttributes(STI);
// RW data addressing.
if (isPositionIndependent()) {
@@ -846,32 +726,15 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::AllowIEEE754);
- if (STI.allowsUnalignedMem())
- ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
- ARMBuildAttrs::Allowed);
- else
- ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
- ARMBuildAttrs::Not_Allowed);
-
// FIXME: add more flags to ARMBuildAttributes.h
// 8-bytes alignment stuff.
ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1);
ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1);
- // ABI_HardFP_use attribute to indicate single precision FP.
- if (STI.isFPOnlySP())
- ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
- ARMBuildAttrs::HardFPSinglePrecision);
-
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
- // FIXME: Should we signal R9 usage?
-
- if (STI.hasFP16())
- ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
-
// FIXME: To support emitting this build attribute as GCC does, the
// -mfp16-format option and associated plumbing must be
// supported. For now the __fp16 type is exposed by default, so this
@@ -879,21 +742,6 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
ARMBuildAttrs::FP16FormatIEEE);
- if (STI.hasMPExtension())
- ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
-
- // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
- // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
- // It is not possible to produce DisallowDIV: if hwdiv is present in the base
- // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
- // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
- // otherwise, the default value (AllowDIVIfExists) applies.
- if (STI.hasDivideInARMMode() && !STI.hasV8Ops())
- ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
-
- if (STI.hasDSP() && isV8M(&STI))
- ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
-
if (MMI) {
if (const Module *SourceModule = MMI->getModule()) {
// ABI_PCS_wchar_t to indicate wchar_t width
@@ -930,16 +778,6 @@ void ARMAsmPrinter::emitAttributes() {
else
ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
ARMBuildAttrs::R9IsGPR);
-
- if (STI.hasTrustZone() && STI.hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZVirtualization);
- else if (STI.hasTrustZone())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowTZ);
- else if (STI.hasVirtualization())
- ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
- ARMBuildAttrs::AllowVirtualization);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 23777b821f9f..faf1c631a3a7 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -404,6 +404,29 @@ public:
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
bool isSwiftFastImmShift(const MachineInstr *MI) const;
+
+ /// Returns predicate register associated with the given frame instruction.
+ unsigned getFramePred(const MachineInstr &MI) const {
+ assert(isFrameInstr(MI));
+ if (isFrameSetup(MI))
+ // Operands of ADJCALLSTACKDOWN:
+ // - argument declared in ADJCALLSTACKDOWN pattern:
+ // 0 - frame size
+ // 1 - predicate code (like ARMCC::AL)
+ // - added by predOps:
+ // 2 - predicate reg
+ return MI.getOperand(2).getReg();
+ assert(MI.getOpcode() == ARM::ADJCALLSTACKUP ||
+ MI.getOpcode() == ARM::tADJCALLSTACKUP);
+ // Operands of ADJCALLSTACKUP:
+ // - argument declared in ADJCALLSTACKUP pattern:
+ // 0 - frame size
+ // 1 - arg of CALLSEQ_END
+ // 2 - predicate code
+ // - added by predOps:
+ // 3 - predicate reg
+ return MI.getOperand(3).getReg();
+ }
};
/// Get the operands corresponding to the given \p Pred value. By default, the
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 7a7b7fede7c8..bc7afdb7f1c9 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -273,9 +273,9 @@ def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
-def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
- (sequence "R%u", 12, 1),
- (sequence "D%u", 31, 0))>;
+def CSR_iOS_TLSCall
+ : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12),
+ (sequence "D%u", 31, 0))>;
// C++ TLS access function saves all registers except SP. Try to match
// the order of CSRs in CSR_iOS.
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 23722f1b7f3f..6434df317aa8 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1741,10 +1741,9 @@ bool ARMConstantIslands::undoLRSpillRestore() {
.add(MI->getOperand(1));
MI->eraseFromParent();
MadeChange = true;
- }
- if (MI->getOpcode() == ARM::tPUSH &&
- MI->getOperand(2).getReg() == ARM::LR &&
- MI->getNumExplicitOperands() == 3) {
+ } else if (MI->getOpcode() == ARM::tPUSH &&
+ MI->getOperand(2).getReg() == ARM::LR &&
+ MI->getNumExplicitOperands() == 3) {
// Just remove the push.
MI->eraseFromParent();
MadeChange = true;
@@ -2158,6 +2157,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// If we're in PIC mode, there should be another ADD following.
auto *TRI = STI->getRegisterInfo();
+
+ // %base cannot be redefined after the load as it will appear before
+ // TBB/TBH like:
+ // %base =
+ // %base =
+ // tBB %base, %idx
+ if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI))
+ continue;
+
if (isPositionIndependentOrROPI) {
MachineInstr *Add = Load->getNextNode();
if (Add->getOpcode() != ARM::tADDrr ||
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 01e062bd185c..e9bc7db66fa4 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1702,7 +1702,8 @@ bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
// If we have integer div support we should have selected this automagically.
// In case we have a real miss go ahead and return false and we'll pick
// it up later.
- if (Subtarget->hasDivide()) return false;
+ if (Subtarget->hasDivideInThumbMode())
+ return false;
// Otherwise emit a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 37be22bed540..70dbe1bc5b95 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -322,6 +322,18 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
}
}
+/// We need the offset of the frame pointer relative to other MachineFrameInfo
+/// offsets which are encoded relative to SP at function begin.
+/// See also emitPrologue() for how the FP is set up.
+/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
+/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
+/// this to produce a conservative estimate that we check in an assert() later.
+static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
+ // This is a conservative estimation: Assume the frame pointer being r7 and
+ // pc("r15") up to r8 getting spilled before (= 8 registers).
+ return -AFI.getArgRegsSaveSize() - (8 * 4);
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -432,8 +444,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
int FramePtrOffsetInPush = 0;
if (HasFP) {
- FramePtrOffsetInPush =
- MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
+ int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
+ assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset &&
+ "Max FP estimation is wrong");
+ FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -1700,6 +1714,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// worth the effort and added fragility?
unsigned EstimatedStackSize =
MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
+
+ // Determine biggest (positive) SP offset in MachineFrameInfo.
+ int MaxFixedOffset = 0;
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
+ MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
+ }
+
bool HasFP = hasFP(MF);
if (HasFP) {
if (AFI->hasStackFrame())
@@ -1707,15 +1729,20 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
} else {
// If FP is not used, SP will be used to access arguments, so count the
// size of arguments into the estimation.
- EstimatedStackSize += AFI->getArgumentStackSize();
+ EstimatedStackSize += MaxFixedOffset;
}
EstimatedStackSize += 16; // For possible paddings.
- bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) ||
- MFI.hasVarSizedObjects() ||
- (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+ int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI);
+ bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
+ MFI.hasVarSizedObjects() ||
+ (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
+ // For large argument stacks fp relative addressed may overflow.
+ (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
bool ExtraCSSpill = false;
- if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ if (BigFrameOffsets ||
+ !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
if (HasFP) {
@@ -1899,7 +1926,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
// adjustments also, even when the frame itself is small.
- if (BigStack && !ExtraCSSpill) {
+ if (BigFrameOffsets && !ExtraCSSpill) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
unsigned NumExtras = TargetAlign / 4;
@@ -1958,7 +1985,7 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr &Old = *I;
DebugLoc dl = Old.getDebugLoc();
- unsigned Amount = Old.getOperand(0).getImm();
+ unsigned Amount = TII.getFrameSize(Old);
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
@@ -1976,14 +2003,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
ARMCC::CondCodes Pred =
(PIdx == -1) ? ARMCC::AL
: (ARMCC::CondCodes)Old.getOperand(PIdx).getImm();
+ unsigned PredReg = TII.getFramePred(Old);
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
- unsigned PredReg = Old.getOperand(2).getReg();
emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
Pred, PredReg);
} else {
- // Note: PredReg is operand 3 for ADJCALLSTACKUP.
- unsigned PredReg = Old.getOperand(3).getReg();
assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
Pred, PredReg);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b07b4e1f5cfb..e9df9449103c 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -228,11 +228,6 @@ private:
const uint16_t *DOpcodes,
const uint16_t *QOpcodes = nullptr);
- /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
- /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
- /// generated to force the table registers to be consecutive.
- void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
-
/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -544,11 +539,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
SDValue NewMulConst;
if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
HandleSDNode Handle(N);
+ SDLoc Loc(N);
replaceDAGValue(N.getOperand(1), NewMulConst);
BaseReg = Handle.getValue();
- Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
- PowerOfTwo),
- SDLoc(N), MVT::i32);
+ Opc = CurDAG->getTargetConstant(
+ ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
return true;
}
}
@@ -1859,6 +1854,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
return Opc; // If not one we handle, return it unchanged.
}
+/// Returns true if the given increment is a Constant known to be equal to the
+/// access size performed by a NEON load/store. This means the "[rN]!" form can
+/// be used.
+static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
+ auto C = dyn_cast<ConstantSDNode>(Inc);
+ return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
+}
+
void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
@@ -1926,13 +1929,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+ if ((NumVecs <= 2) && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
- !isa<ConstantSDNode>(Inc.getNode()))
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
+ Ops.push_back(IsImmUpdate ? Reg0 : Inc);
}
Ops.push_back(Pred);
Ops.push_back(Reg0);
@@ -2080,11 +2083,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue Inc = N->getOperand(AddrOpIdx + 1);
// FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if (!isa<ConstantSDNode>(Inc.getNode()))
+ if (!IsImmUpdate)
Ops.push_back(Inc);
else if (NumVecs > 2 && !isVSTfixed(Opc))
Ops.push_back(Reg0);
@@ -2214,7 +2218,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ Ops.push_back(IsImmUpdate ? Reg0 : Inc);
}
SDValue SuperReg;
@@ -2318,9 +2324,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
// fixed-stride update instructions don't have an explicit writeback
// operand. It's implicit in the opcode itself.
SDValue Inc = N->getOperand(2);
- if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- if (!isa<ConstantSDNode>(Inc.getNode()))
+ if (!IsImmUpdate)
Ops.push_back(Inc);
// FIXME: VLD3 and VLD4 haven't been updated to that form yet.
else if (NumVecs > 2)
@@ -2356,39 +2364,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
- unsigned Opc) {
- assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- unsigned FirstTblReg = IsExt ? 2 : 1;
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- SDValue V0 = N->getOperand(FirstTblReg + 0);
- SDValue V1 = N->getOperand(FirstTblReg + 1);
- if (NumVecs == 2)
- RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
- else {
- SDValue V2 = N->getOperand(FirstTblReg + 2);
- // If it's a vtbl3, form a quad D-register and leave the last part as
- // an undef.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
- : N->getOperand(FirstTblReg + 3);
- RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
- SmallVector<SDValue, 6> Ops;
- if (IsExt)
- Ops.push_back(N->getOperand(1));
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
- Ops.push_back(getAL(CurDAG, dl)); // predicate
- Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
- ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
-}
-
bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return false;
@@ -3730,59 +3705,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default:
- break;
-
- case Intrinsic::arm_neon_vtbl2:
- SelectVTBL(N, false, 2, ARM::VTBL2);
- return;
- case Intrinsic::arm_neon_vtbl3:
- SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
- return;
- case Intrinsic::arm_neon_vtbl4:
- SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
- return;
-
- case Intrinsic::arm_neon_vtbx2:
- SelectVTBL(N, true, 2, ARM::VTBX2);
- return;
- case Intrinsic::arm_neon_vtbx3:
- SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
- return;
- case Intrinsic::arm_neon_vtbx4:
- SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
- return;
- }
- break;
- }
-
- case ARMISD::VTBL1: {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), // Predicate
- CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
- return;
- }
- case ARMISD::VTBL2: {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue V0 = N->getOperand(0);
- SDValue V1 = N->getOperand(1);
- SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
-
- SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
- CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
- return;
- }
-
case ISD::ATOMIC_CMP_SWAP:
SelectCMP_SWAP(N);
return;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e697c8ca5339..165e9b7378c7 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -852,7 +852,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
: Subtarget->hasDivideInARMMode();
if (!hasDivide) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
@@ -860,7 +860,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
- if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) {
+ if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
@@ -2633,7 +2633,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
-bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!Subtarget->supportsTailCall())
return false;
@@ -3347,6 +3347,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::arm_neon_vtbl1:
+ return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::arm_neon_vtbl2:
+ return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
@@ -10867,11 +10873,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
- if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
- uint64_t IncVal = CInc->getZExtValue();
- if (IncVal != NumBytes)
- continue;
- } else if (NumBytes >= 3 * 16) {
+ ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
+ if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
// VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
// separate instructions that make it harder to use a non-constant update.
continue;
@@ -11688,34 +11691,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
- APInt &KnownOne) {
- if (Op.getOpcode() == ARMISD::BFI) {
- // Conservatively, we can recurse down the first operand
- // and just mask out all affected bits.
- computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
-
- // The operand to BFI is already a mask suitable for removing the bits it
- // sets.
- ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
- const APInt &Mask = CI->getAPIntValue();
- KnownZero &= Mask;
- KnownOne &= Mask;
- return;
- }
- if (Op.getOpcode() == ARMISD::CMOV) {
- APInt KZ2(KnownZero.getBitWidth(), 0);
- APInt KO2(KnownOne.getBitWidth(), 0);
- computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
- computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2);
-
- KnownZero &= KZ2;
- KnownOne &= KO2;
- return;
- }
- return DAG.computeKnownBits(Op, KnownZero, KnownOne);
-}
-
SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
// If we have a CMOV, OR and AND combination such as:
// if (x & CN)
@@ -11777,7 +11752,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
// Lastly, can we determine that the bits defined by OrCI
// are zero in Y?
APInt KnownZero, KnownOne;
- computeKnownBits(DAG, Y, KnownZero, KnownOne);
+ DAG.computeKnownBits(Y, KnownZero, KnownOne);
if ((OrCI & KnownZero) != OrCI)
return SDValue();
@@ -12657,6 +12632,19 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
}
}
+ case ARMISD::BFI: {
+ // Conservatively, we can recurse down the first operand
+ // and just mask out all affected bits.
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1);
+
+ // The operand to BFI is already a mask suitable for removing the bits it
+ // sets.
+ ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+ const APInt &Mask = CI->getAPIntValue();
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+ return;
+ }
}
}
@@ -13052,7 +13040,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
// rem = a - b * div
// return {div, rem}
// This should be lowered into UDIV/SDIV + MLS later on.
- if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() &&
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
+ : Subtarget->hasDivideInARMMode();
+ if (hasDivide && Op->getValueType(0).isSimple() &&
Op->getSimpleValueType(0) == MVT::i32) {
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
const SDValue Dividend = Op->getOperand(0);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 70a0b1380ec9..8b54ce430ed2 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -717,7 +717,7 @@ class InstrItineraryData;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cc0e7d4d9c35..703e8071b177 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -259,8 +259,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16","full half-float">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
+def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
+ AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
def HasDSP : Predicate<"Subtarget->hasDSP()">,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 681e235d78f0..9b08c612e16b 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -587,6 +587,14 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
+ SDTCisVT<2, v8i8>]>;
+def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
+ SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
+def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
+def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
+
+
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
@@ -6443,7 +6451,8 @@ def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
(ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
"vtbl", "8", "$Vd, $Vn, $Vm", "",
- [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+ [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
+
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
@@ -6498,6 +6507,49 @@ def VTBX4Pseudo
IIC_VTBX4, "$orig = $dst", []>;
} // DecoderMethod = "DecodeTBLInstruction"
+def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
+ (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vm)),
+ (v8i8 (VTBX2 v8i8:$orig,
+ (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1),
+ v8i8:$Vm))>;
+
+def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vm)),
+ (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ (v8i8 (IMPLICIT_DEF)), dsub_3),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vm)),
+ (v8i8 (VTBX3Pseudo v8i8:$orig,
+ (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ (v8i8 (IMPLICIT_DEF)), dsub_3),
+ v8i8:$Vm))>;
+
+def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
+ (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ v8i8:$Vn3, dsub_3),
+ v8i8:$Vm))>;
+def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
+ v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
+ (v8i8 (VTBX4Pseudo v8i8:$orig,
+ (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
+ v8i8:$Vn1, dsub_1,
+ v8i8:$Vn2, dsub_2,
+ v8i8:$Vn3, dsub_3),
+ v8i8:$Vm))>;
+
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index f5b673b78ad7..f710ee6a7e77 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2797,7 +2797,7 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>,
Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
@@ -2809,7 +2809,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>,
Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 8d224d6a70fa..816596b85721 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -299,6 +299,20 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
+ case G_SUB:
+ I.setDesc(TII.get(ARM::SUBrr));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
+ case G_MUL:
+ if (TII.getSubtarget().hasV6Ops()) {
+ I.setDesc(TII.get(ARM::MUL));
+ } else {
+ assert(TII.getSubtarget().useMulOps() && "Unsupported target");
+ I.setDesc(TII.get(ARM::MULv5));
+ MIB->getOperand(0).setIsEarlyClobber(true);
+ }
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
case G_FADD:
if (!selectFAdd(MIB, TII, MRI))
return false;
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 994bbd673dd8..fe9681439e6b 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -43,8 +43,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, 1, p0}, Legal);
}
- for (auto Ty : {s1, s8, s16, s32})
- setAction({G_ADD, Ty}, Legal);
+ for (unsigned Op : {G_ADD, G_SUB, G_MUL})
+ for (auto Ty : {s1, s8, s16, s32})
+ setAction({Op, Ty}, Legal);
for (unsigned Op : {G_SEXT, G_ZEXT}) {
setAction({Op, s32}, Legal);
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 08f3da738868..e47bd3a8963e 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -219,6 +219,8 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (Opc) {
case G_ADD:
+ case G_SUB:
+ case G_MUL:
case G_SEXT:
case G_ZEXT:
case G_GEP:
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 40993fc0aa8a..d2630685d91b 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -208,8 +208,8 @@ protected:
/// FP registers for VFPv3.
bool HasD16 = false;
- /// HasHardwareDivide - True if subtarget supports [su]div
- bool HasHardwareDivide = false;
+ /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
+ bool HasHardwareDivideInThumb = false;
/// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
bool HasHardwareDivideInARM = false;
@@ -507,7 +507,7 @@ public:
return hasNEON() && UseNEONForSinglePrecisionFP;
}
- bool hasDivide() const { return HasHardwareDivide; }
+ bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasDataBarrier() const { return HasDataBarrier; }
bool hasV7Clrex() const { return HasV7Clrex; }
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f421d3ac1693..ada816c16389 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -67,6 +67,9 @@ static cl::opt<ImplicitItModeTy> ImplicitItMode(
clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb",
"Warn in ARM, emit implicit ITs in Thumb")));
+static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes",
+ cl::init(false));
+
class ARMOperand;
enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
@@ -540,6 +543,10 @@ public:
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ // Add build attributes based on the selected target.
+ if (AddBuildAttributes)
+ getTargetStreamer().emitTargetAttributes(STI);
+
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
@@ -10189,8 +10196,8 @@ static const struct {
{ ARM::AEK_CRYPTO, Feature_HasV8,
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
- { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
- {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
+ { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
+ {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
{ ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6fa890ba1cd5..4d6c52f3cd49 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -464,7 +464,7 @@ public:
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
- LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo);
+ LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo);
MCELFStreamer::ChangeSection(Section, Subsection);
auto LastMappingSymbol = LastMappingSymbols.find(Section);
if (LastMappingSymbol != LastMappingSymbols.end()) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 73e563890dd9..2b0cd461df7a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,9 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMTargetMachine.h"
#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -75,3 +79,179 @@ void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
+
+static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
+ if (STI.getCPU() == "xscale")
+ return ARMBuildAttrs::v5TEJ;
+
+ if (STI.hasFeature(ARM::HasV8Ops)) {
+ if (STI.hasFeature(ARM::FeatureRClass))
+ return ARMBuildAttrs::v8_R;
+ return ARMBuildAttrs::v8_A;
+ } else if (STI.hasFeature(ARM::HasV8MMainlineOps))
+ return ARMBuildAttrs::v8_M_Main;
+ else if (STI.hasFeature(ARM::HasV7Ops)) {
+ if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP))
+ return ARMBuildAttrs::v7E_M;
+ return ARMBuildAttrs::v7;
+ } else if (STI.hasFeature(ARM::HasV6T2Ops))
+ return ARMBuildAttrs::v6T2;
+ else if (STI.hasFeature(ARM::HasV8MBaselineOps))
+ return ARMBuildAttrs::v8_M_Base;
+ else if (STI.hasFeature(ARM::HasV6MOps))
+ return ARMBuildAttrs::v6S_M;
+ else if (STI.hasFeature(ARM::HasV6Ops))
+ return ARMBuildAttrs::v6;
+ else if (STI.hasFeature(ARM::HasV5TEOps))
+ return ARMBuildAttrs::v5TE;
+ else if (STI.hasFeature(ARM::HasV5TOps))
+ return ARMBuildAttrs::v5T;
+ else if (STI.hasFeature(ARM::HasV4TOps))
+ return ARMBuildAttrs::v4T;
+ else
+ return ARMBuildAttrs::v4;
+}
+
+static bool isV8M(const MCSubtargetInfo &STI) {
+ // Note that v8M Baseline is a subset of v6T2!
+ return (STI.hasFeature(ARM::HasV8MBaselineOps) &&
+ !STI.hasFeature(ARM::HasV6T2Ops)) ||
+ STI.hasFeature(ARM::HasV8MMainlineOps);
+}
+
+/// Emit the build attributes that only depend on the hardware that we expect
+// /to be available, and not on the ABI, or any source-language choices.
+void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
+ switchVendor("aeabi");
+
+ const StringRef CPUString = STI.getCPU();
+ if (!CPUString.empty() && !CPUString.startswith("generic")) {
+ // FIXME: remove krait check when GNU tools support krait cpu
+ if (STI.hasFeature(ARM::ProcKrait)) {
+ emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
+ // We consider krait as a "cortex-a9" + hwdiv CPU
+ // Enable hwdiv through ".arch_extension idiv"
+ if (STI.hasFeature(ARM::FeatureHWDivThumb) ||
+ STI.hasFeature(ARM::FeatureHWDivARM))
+ emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM);
+ } else {
+ emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+ }
+ }
+
+ emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI));
+
+ if (STI.hasFeature(ARM::FeatureAClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ } else if (STI.hasFeature(ARM::FeatureRClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::RealTimeProfile);
+ } else if (STI.hasFeature(ARM::FeatureMClass)) {
+ emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::MicroControllerProfile);
+ }
+
+ emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM)
+ ? ARMBuildAttrs::Not_Allowed
+ : ARMBuildAttrs::Allowed);
+
+ if (isV8M(STI)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumbDerived);
+ } else if (STI.hasFeature(ARM::FeatureThumb2)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ } else if (STI.hasFeature(ARM::HasV4TOps)) {
+ emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed);
+ }
+
+ if (STI.hasFeature(ARM::FeatureNEON)) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (STI.hasFeature(ARM::FeatureFPARMv8)) {
+ if (STI.hasFeature(ARM::FeatureCrypto))
+ emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8);
+ else
+ emitFPU(ARM::FK_NEON_FP_ARMV8);
+ } else if (STI.hasFeature(ARM::FeatureVFP4))
+ emitFPU(ARM::FK_NEON_VFPV4);
+ else
+ emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16
+ : ARM::FK_NEON);
+ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
+ if (STI.hasFeature(ARM::HasV8Ops))
+ emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ STI.hasFeature(ARM::HasV8_1aOps)
+ ? ARMBuildAttrs::AllowNeonARMv8_1a
+ : ARMBuildAttrs::AllowNeonARMv8);
+ } else {
+ if (STI.hasFeature(ARM::FeatureFPARMv8))
+ // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+ // FPU, but there are two different names for it depending on the CPU.
+ emitFPU(STI.hasFeature(ARM::FeatureD16)
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
+ : ARM::FK_FPV5_D16)
+ : ARM::FK_FP_ARMV8);
+ else if (STI.hasFeature(ARM::FeatureVFP4))
+ emitFPU(STI.hasFeature(ARM::FeatureD16)
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
+ : ARM::FK_VFPV4_D16)
+ : ARM::FK_VFPV4);
+ else if (STI.hasFeature(ARM::FeatureVFP3))
+ emitFPU(
+ STI.hasFeature(ARM::FeatureD16)
+ // +d16
+ ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
+ ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
+ : ARM::FK_VFPV3XD)
+ : (STI.hasFeature(ARM::FeatureFP16)
+ ? ARM::FK_VFPV3_D16_FP16
+ : ARM::FK_VFPV3_D16))
+ // -d16
+ : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
+ : ARM::FK_VFPV3));
+ else if (STI.hasFeature(ARM::FeatureVFP2))
+ emitFPU(ARM::FK_VFPV2);
+ }
+
+ // ABI_HardFP_use attribute to indicate single precision FP.
+ if (STI.hasFeature(ARM::FeatureVFPOnlySP))
+ emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
+ ARMBuildAttrs::HardFPSinglePrecision);
+
+ if (STI.hasFeature(ARM::FeatureFP16))
+ emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ if (STI.hasFeature(ARM::FeatureMP))
+ emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+
+ // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
+ // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
+ // It is not possible to produce DisallowDIV: if hwdiv is present in the base
+ // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits.
+ // AllowDIVExt is only emitted if hwdiv isn't available in the base arch;
+ // otherwise, the default value (AllowDIVIfExists) applies.
+ if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops))
+ emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+
+ if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI))
+ emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed);
+
+ if (STI.hasFeature(ARM::FeatureStrictAlign))
+ emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Not_Allowed);
+ else
+ emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Allowed);
+
+ if (STI.hasFeature(ARM::FeatureTrustZone) &&
+ STI.hasFeature(ARM::FeatureVirtualization))
+ emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (STI.hasFeature(ARM::FeatureTrustZone))
+ emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ);
+ else if (STI.hasFeature(ARM::FeatureVirtualization))
+ emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
+}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index fc083b98395b..d0fd366ab9ed 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -83,13 +83,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr &Old = *I;
DebugLoc dl = Old.getDebugLoc();
- unsigned Amount = Old.getOperand(0).getImm();
+ unsigned Amount = TII.getFrameSize(Old);
if (Amount != 0) {
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
+ Amount = alignTo(Amount, getStackAlignment());
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old.getOpcode();
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index 9f2ee8cf8035..535bb012eb07 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -18,7 +18,7 @@
namespace llvm {
AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) {
- PointerSize = 2;
+ CodePointerSize = 2;
CalleeSaveStackSlotSize = 2;
CommentString = ";";
PrivateGlobalPrefix = ".L";
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 559ac291a79e..fd7c97bf1f0a 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -42,7 +42,7 @@ public:
// messed up in random places by 4 bytes. .debug_line
// section will be parsable, but with odd offsets and
// line numbers, etc.
- PointerSize = 8;
+ CodePointerSize = 8;
}
};
}
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index 61d3630ac095..cb3049bf1500 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -1011,12 +1011,7 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
bool BT::reached(const MachineBasicBlock *B) const {
int BN = B->getNumber();
assert(BN >= 0);
- for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end();
- I != E; ++I) {
- if (I->second == BN)
- return true;
- }
- return false;
+ return ReachedBB.count(BN);
}
// Visit an individual instruction. This could be a newly added instruction,
@@ -1036,6 +1031,8 @@ void BT::reset() {
EdgeExec.clear();
InstrExec.clear();
Map.clear();
+ ReachedBB.clear();
+ ReachedBB.reserve(MF.size());
}
void BT::run() {
@@ -1068,6 +1065,7 @@ void BT::run() {
if (EdgeExec.count(Edge))
continue;
EdgeExec.insert(Edge);
+ ReachedBB.insert(Edge.second);
const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second);
MachineBasicBlock::const_iterator It = B.begin(), End = B.end();
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index a547b34e852f..7f49f430382d 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -10,6 +10,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -68,10 +69,11 @@ private:
typedef std::set<const MachineInstr *> InstrSetType;
typedef std::queue<CFGEdge> EdgeQueueType;
- EdgeSetType EdgeExec; // Executable flow graph edges.
- InstrSetType InstrExec; // Executable instructions.
- EdgeQueueType FlowQ; // Work queue of CFG edges.
- bool Trace; // Enable tracing for debugging.
+ EdgeSetType EdgeExec; // Executable flow graph edges.
+ InstrSetType InstrExec; // Executable instructions.
+ EdgeQueueType FlowQ; // Work queue of CFG edges.
+ DenseSet<unsigned> ReachedBB; // Cache of reached blocks.
+ bool Trace; // Enable tracing for debugging.
const MachineEvaluator &ME;
MachineFunction &MF;
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 418dd71aeb4b..e5eb059b566f 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -635,7 +635,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
}
-bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// If either no tail call or told not to tail call at all, don't.
auto Attr =
CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index fb8f0ba6b057..1415156487c0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -195,7 +195,7 @@ namespace HexagonISD {
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index b243de317dc5..27b40f134b1f 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -35,7 +35,6 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
-#include <map>
static cl::opt<int> CodeGrowthLimit("hexagon-amode-growth-limit",
cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode "
@@ -45,10 +44,8 @@ using namespace llvm;
using namespace rdf;
namespace llvm {
-
FunctionPass *createHexagonOptAddrMode();
- void initializeHexagonOptAddrModePass(PassRegistry &);
-
+ void initializeHexagonOptAddrModePass(PassRegistry&);
} // end namespace llvm
namespace {
@@ -59,10 +56,7 @@ public:
HexagonOptAddrMode()
: MachineFunctionPass(ID), HII(nullptr), MDT(nullptr), DFG(nullptr),
- LV(nullptr) {
- PassRegistry &R = *PassRegistry::getPassRegistry();
- initializeHexagonOptAddrModePass(R);
- }
+ LV(nullptr) {}
StringRef getPassName() const override {
return "Optimize addressing mode of load/store";
@@ -84,7 +78,6 @@ private:
MachineDominatorTree *MDT;
DataFlowGraph *DFG;
DataFlowGraph::DefStackMap DefM;
- std::map<RegisterRef, std::map<NodeId, NodeId>> RDefMap;
Liveness *LV;
MISetType Deleted;
@@ -99,8 +92,6 @@ private:
void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList);
bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList);
short getBaseWithLongOffset(const MachineInstr &MI) const;
- void updateMap(NodeAddr<InstrNode *> IA);
- bool constructDefMap(MachineBasicBlock *B);
bool changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
unsigned ImmOpNum);
bool changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum);
@@ -112,11 +103,11 @@ private:
char HexagonOptAddrMode::ID = 0;
-INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "opt-amode",
+INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "amode-opt",
"Optimize addressing mode", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
-INITIALIZE_PASS_END(HexagonOptAddrMode, "opt-amode", "Optimize addressing mode",
+INITIALIZE_PASS_END(HexagonOptAddrMode, "amode-opt", "Optimize addressing mode",
false, false)
bool HexagonOptAddrMode::hasRepForm(MachineInstr &MI, unsigned TfrDefR) {
@@ -173,8 +164,11 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
NodeAddr<UseNode *> UA = *I;
NodeAddr<InstrNode *> IA = UA.Addr->getOwner(*DFG);
- if ((UA.Addr->getFlags() & NodeAttrs::PhiRef) ||
- RDefMap[OffsetRR][IA.Id] != OffsetRegRD)
+ if (UA.Addr->getFlags() & NodeAttrs::PhiRef)
+ return false;
+ NodeAddr<RefNode*> AA = LV->getNearestAliasedRef(OffsetRR, IA);
+ if ((DFG->IsDef(AA) && AA.Id != OffsetRegRD) ||
+ AA.Addr->getReachingDef() != OffsetRegRD)
return false;
MachineInstr &UseMI = *NodeAddr<StmtNode *>(IA).Addr->getCode();
@@ -486,14 +480,14 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
MIB.add(AddAslMI->getOperand(2));
MIB.add(AddAslMI->getOperand(3));
const GlobalValue *GV = ImmOp.getGlobal();
- MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(),
+ MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm()+ImmOp.getOffset(),
ImmOp.getTargetFlags());
OpStart = 3;
} else if (UseMID.mayStore()) {
MIB.add(AddAslMI->getOperand(2));
MIB.add(AddAslMI->getOperand(3));
const GlobalValue *GV = ImmOp.getGlobal();
- MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(),
+ MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm()+ImmOp.getOffset(),
ImmOp.getTargetFlags());
MIB.add(UseMI->getOperand(2));
OpStart = 3;
@@ -597,46 +591,6 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
return Changed;
}
-void HexagonOptAddrMode::updateMap(NodeAddr<InstrNode *> IA) {
- RegisterSet RRs;
- for (NodeAddr<RefNode *> RA : IA.Addr->members(*DFG))
- RRs.insert(RA.Addr->getRegRef(*DFG));
- bool Common = false;
- for (auto &R : RDefMap) {
- if (!RRs.count(R.first))
- continue;
- Common = true;
- break;
- }
- if (!Common)
- return;
-
- for (auto &R : RDefMap) {
- auto F = DefM.find(R.first.Reg);
- if (F == DefM.end() || F->second.empty())
- continue;
- R.second[IA.Id] = F->second.top()->Id;
- }
-}
-
-bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) {
- bool Changed = false;
- auto BA = DFG->getFunc().Addr->findBlock(B, *DFG);
- DFG->markBlock(BA.Id, DefM);
-
- for (NodeAddr<InstrNode *> IA : BA.Addr->members(*DFG)) {
- updateMap(IA);
- DFG->pushAllDefs(IA, DefM);
- }
-
- MachineDomTreeNode *N = MDT->getNode(B);
- for (auto I : *N)
- Changed |= constructDefMap(I->getBlock());
-
- DFG->releaseBlock(BA.Id, DefM);
- return Changed;
-}
-
bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -658,8 +612,6 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
L.computePhiInfo();
LV = &L;
- constructDefMap(&DFG->getMF().front());
-
Deleted.clear();
NodeAddr<FuncNode *> FA = DFG->getFunc();
DEBUG(dbgs() << "==== [RefMap#]=====:\n "
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 06fc9195fa67..6913d50bbcaa 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -111,6 +111,7 @@ namespace llvm {
extern char &HexagonExpandCondsetsID;
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
+ void initializeHexagonOptAddrModePass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
FunctionPass *createHexagonBitSimplify();
@@ -152,6 +153,7 @@ extern "C" void LLVMInitializeHexagonTarget() {
// Register the target.
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ initializeHexagonOptAddrModePass(*PassRegistry::getPassRegistry());
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp
index 57ce9fabc5e3..ea86ffba58f6 100644
--- a/lib/Target/Hexagon/RDFCopy.cpp
+++ b/lib/Target/Hexagon/RDFCopy.cpp
@@ -59,7 +59,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) {
bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
bool Changed = false;
- auto BA = DFG.getFunc().Addr->findBlock(B, DFG);
+ NodeAddr<BlockNode*> BA = DFG.findBlock(B);
for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h
index d5faca4cd6f4..52f390356b26 100644
--- a/lib/Target/Hexagon/RDFGraph.h
+++ b/lib/Target/Hexagon/RDFGraph.h
@@ -508,7 +508,8 @@ namespace rdf {
static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
"NodeBase must be at most NodeAllocator::NodeMemSize bytes");
- typedef std::vector<NodeAddr<NodeBase*>> NodeList;
+// typedef std::vector<NodeAddr<NodeBase*>> NodeList;
+ typedef SmallVector<NodeAddr<NodeBase*>,4> NodeList;
typedef std::set<NodeId> NodeSet;
struct RefNode : public NodeBase {
diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp
index 5c5496a548af..4224ded3418b 100644
--- a/lib/Target/Hexagon/RDFRegisters.cpp
+++ b/lib/Target/Hexagon/RDFRegisters.cpp
@@ -69,6 +69,19 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
for (const MachineOperand &Op : In.operands())
if (Op.isRegMask())
RegMasks.insert(Op.getRegMask());
+
+ MaskInfos.resize(RegMasks.size()+1);
+ for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) {
+ BitVector PU(TRI.getNumRegUnits());
+ const uint32_t *MB = RegMasks.get(M);
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
+ if (!(MB[i/32] & (1u << (i%32))))
+ continue;
+ for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U)
+ PU.set(*U);
+ }
+ MaskInfos[M].Units = PU.flip();
+ }
}
RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const {
@@ -201,17 +214,8 @@ bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const {
bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
- // XXX SLOW
- const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
- for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
- if (MB[i/32] & (1u << (i%32)))
- continue;
- if (hasAliasOf(RegisterRef(i, LaneBitmask::getAll())))
- return true;
- }
- return false;
- }
+ if (PhysicalRegisterInfo::isRegMaskId(RR.Reg))
+ return Units.anyCommon(PRI.getMaskUnits(RR.Reg));
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
std::pair<uint32_t,LaneBitmask> P = *U;
@@ -224,15 +228,8 @@ bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
- // XXX SLOW
- const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
- for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
- if (MB[i/32] & (1u << (i%32)))
- continue;
- if (!hasCoverOf(RegisterRef(i, LaneBitmask::getAll())))
- return false;
- }
- return true;
+ BitVector T(PRI.getMaskUnits(RR.Reg));
+ return T.reset(Units).none();
}
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
@@ -246,15 +243,7 @@ bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
- BitVector PU(PRI.getTRI().getNumRegUnits()); // Preserved units.
- const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
- for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
- if (!(MB[i/32] & (1u << (i%32))))
- continue;
- for (MCRegUnitIterator U(i, &PRI.getTRI()); U.isValid(); ++U)
- PU.set(*U);
- }
- Units |= PU.flip();
+ Units |= PRI.getMaskUnits(RR.Reg);
return *this;
}
diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h
index 4b35c85a6b62..314d8b5666d7 100644
--- a/lib/Target/Hexagon/RDFRegisters.h
+++ b/lib/Target/Hexagon/RDFRegisters.h
@@ -51,6 +51,8 @@ namespace rdf {
return F - Map.begin() + 1;
}
+ uint32_t size() const { return Map.size(); }
+
typedef typename std::vector<T>::const_iterator const_iterator;
const_iterator begin() const { return Map.begin(); }
const_iterator end() const { return Map.end(); }
@@ -107,6 +109,9 @@ namespace rdf {
RegisterRef getRefForUnit(uint32_t U) const {
return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
}
+ const BitVector &getMaskUnits(RegisterId MaskId) const {
+ return MaskInfos[TargetRegisterInfo::stackSlot2Index(MaskId)].Units;
+ }
const TargetRegisterInfo &getTRI() const { return TRI; }
@@ -118,11 +123,15 @@ namespace rdf {
RegisterId Reg = 0;
LaneBitmask Mask;
};
+ struct MaskInfo {
+ BitVector Units;
+ };
const TargetRegisterInfo &TRI;
+ IndexedSet<const uint32_t*> RegMasks;
std::vector<RegInfo> RegInfos;
std::vector<UnitInfo> UnitInfos;
- IndexedSet<const uint32_t*> RegMasks;
+ std::vector<MaskInfo> MaskInfos;
bool aliasRR(RegisterRef RA, RegisterRef RB) const;
bool aliasRM(RegisterRef RR, RegisterRef RM) const;
@@ -135,7 +144,7 @@ namespace rdf {
: Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
RegisterAggr(const RegisterAggr &RG) = default;
- bool empty() const { return Units.empty(); }
+ bool empty() const { return Units.none(); }
bool hasAliasOf(RegisterRef RR) const;
bool hasCoverOf(RegisterRef RR) const;
static bool isCoverOf(RegisterRef RA, RegisterRef RB,
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index c26b3081dbc3..82e6731ecd78 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
void MSP430MCAsmInfo::anchor() { }
MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
- PointerSize = CalleeSaveStackSlotSize = 2;
+ CodePointerSize = CalleeSaveStackSlotSize = 2;
CommentString = ";";
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index ebe3c5784888..11411d997bb3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -23,7 +23,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
if ((TheTriple.getArch() == Triple::mips64el) ||
(TheTriple.getArch() == Triple::mips64)) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
// FIXME: This condition isn't quite right but it's the best we can do until
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 8b04fcb76920..bf79f0f2ff82 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -3781,6 +3781,80 @@ let Predicates = [HasMSA] in {
ISA_MIPS1_NOT_32R6_64R6;
}
+def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
+ APInt Imm;
+ SDNode *BV = N->getOperand(0).getNode();
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63;
+}]>;
+
+def immi32Cst7 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 7;}]>;
+def immi32Cst15 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 15;}]>;
+def immi32Cst31 : ImmLeaf<i32, [{return isUInt<32>(Imm) && Imm == 31;}]>;
+
+def vsplati8imm7 : PatFrag<(ops node:$wt),
+ (and node:$wt, (vsplati8 immi32Cst7))>;
+def vsplati16imm15 : PatFrag<(ops node:$wt),
+ (and node:$wt, (vsplati16 immi32Cst15))>;
+def vsplati32imm31 : PatFrag<(ops node:$wt),
+ (and node:$wt, (vsplati32 immi32Cst31))>;
+def vsplati64imm63 : PatFrag<(ops node:$wt),
+ (and node:$wt, vsplati64_imm_eq_63)>;
+
+class MSAShiftPat<SDNode Node, ValueType VT, MSAInst Insn, dag Vec> :
+ MSAPat<(VT (Node VT:$ws, (VT (and VT:$wt, Vec)))),
+ (VT (Insn VT:$ws, VT:$wt))>;
+
+class MSABitPat<SDNode Node, ValueType VT, MSAInst Insn, PatFrag Frag> :
+ MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))),
+ (VT (Insn VT:$ws, VT:$wt))>;
+
+multiclass MSAShiftPats<SDNode Node, string Insn> {
+ def : MSAShiftPat<Node, v16i8, !cast<MSAInst>(Insn#_B),
+ (vsplati8 immi32Cst7)>;
+ def : MSAShiftPat<Node, v8i16, !cast<MSAInst>(Insn#_H),
+ (vsplati16 immi32Cst15)>;
+ def : MSAShiftPat<Node, v4i32, !cast<MSAInst>(Insn#_W),
+ (vsplati32 immi32Cst31)>;
+ def : MSAPat<(v2i64 (Node v2i64:$ws, (v2i64 (and v2i64:$wt,
+ vsplati64_imm_eq_63)))),
+ (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>;
+}
+
+multiclass MSABitPats<SDNode Node, string Insn> {
+ def : MSABitPat<Node, v16i8, !cast<MSAInst>(Insn#_B), vsplati8imm7>;
+ def : MSABitPat<Node, v8i16, !cast<MSAInst>(Insn#_H), vsplati16imm15>;
+ def : MSABitPat<Node, v4i32, !cast<MSAInst>(Insn#_W), vsplati32imm31>;
+ def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+ (vsplati64imm63 v2i64:$wt))),
+ (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>;
+}
+
+defm : MSAShiftPats<shl, "SLL">;
+defm : MSAShiftPats<srl, "SRL">;
+defm : MSAShiftPats<sra, "SRA">;
+defm : MSABitPats<xor, "BNEG">;
+defm : MSABitPats<or, "BSET">;
+
+def : MSAPat<(and v16i8:$ws, (xor (shl vsplat_imm_eq_1,
+ (vsplati8imm7 v16i8:$wt)),
+ immAllOnesV)),
+ (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>;
+def : MSAPat<(and v8i16:$ws, (xor (shl vsplat_imm_eq_1,
+ (vsplati16imm15 v8i16:$wt)),
+ immAllOnesV)),
+ (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>;
+def : MSAPat<(and v4i32:$ws, (xor (shl vsplat_imm_eq_1,
+ (vsplati32imm31 v4i32:$wt)),
+ immAllOnesV)),
+ (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>;
+def : MSAPat<(and v2i64:$ws, (xor (shl (v2i64 vsplati64_imm_eq_1),
+ (vsplati64imm63 v2i64:$wt)),
+ (bitconvert (v4i32 immAllOnesV)))),
+ (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>;
+
// Vector extraction with fixed index.
//
// Extracting 32-bit values on MSA32 should always use COPY_S_W rather than
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index e2da8477295b..bf7f079e3105 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1547,11 +1547,24 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
}
+static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDValue Vec = Op->getOperand(2);
+ bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
+ MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
+ SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
+ DL, ResEltTy);
+ SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
+
+ return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
+}
+
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
EVT ResTy = Op->getValueType(0);
SDLoc DL(Op);
SDValue One = DAG.getConstant(1, DL, ResTy);
- SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
+ SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
DAG.getNOT(DL, Bit, ResTy));
@@ -1687,7 +1700,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
DAG.getNode(ISD::SHL, DL, VecTy, One,
- Op->getOperand(2)));
+ truncateVecElts(Op, DAG)));
}
case Intrinsic::mips_bnegi_b:
case Intrinsic::mips_bnegi_h:
@@ -1723,7 +1736,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
DAG.getNode(ISD::SHL, DL, VecTy, One,
- Op->getOperand(2)));
+ truncateVecElts(Op, DAG)));
}
case Intrinsic::mips_bseti_b:
case Intrinsic::mips_bseti_h:
@@ -2210,7 +2223,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_sll_w:
case Intrinsic::mips_sll_d:
return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
- Op->getOperand(2));
+ truncateVecElts(Op, DAG));
case Intrinsic::mips_slli_b:
case Intrinsic::mips_slli_h:
case Intrinsic::mips_slli_w:
@@ -2240,7 +2253,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_sra_w:
case Intrinsic::mips_sra_d:
return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
- Op->getOperand(2));
+ truncateVecElts(Op, DAG));
case Intrinsic::mips_srai_b:
case Intrinsic::mips_srai_h:
case Intrinsic::mips_srai_w:
@@ -2270,7 +2283,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_srl_w:
case Intrinsic::mips_srl_d:
return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
- Op->getOperand(2));
+ truncateVecElts(Op, DAG));
case Intrinsic::mips_srli_b:
case Intrinsic::mips_srli_h:
case Intrinsic::mips_srli_w:
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 78bdf4e698d8..bdd0f156c8af 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -27,7 +27,7 @@ void NVPTXMCAsmInfo::anchor() {}
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) {
if (TheTriple.getArch() == Triple::nvptx64) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
CommentString = "//";
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 21e25de80dc7..ba28cd83278b 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -2004,7 +2004,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
uint8_t Byte = Val.getLoBits(8).getZExtValue();
aggBuffer->addBytes(&Byte, 1, 1);
- Val = Val.lshr(8);
+ Val.lshrInPlace(8);
}
return;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index d8fab5b7c01a..d30bf1a56e8a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -20,7 +20,7 @@ void PPCMCAsmInfoDarwin::anchor() { }
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
if (is64Bit) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
IsLittleEndian = false;
@@ -50,7 +50,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
NeedsLocalForSize = true;
if (is64Bit) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
IsLittleEndian = T.getArch() == Triple::ppc64le;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9c72638023bb..125c00295f88 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -2977,10 +2977,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
- N->getValueType(0), Ops);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
+ SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops);
cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
return;
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index b164df8b595a..d622911e92c4 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
void RISCVMCAsmInfo::anchor() {}
RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
- PointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
+ CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
CommentString = "#";
AlignmentIsInBytes = false;
SupportsDebugInformation = true;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 3ed09898fb78..21df60237d96 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -28,7 +28,7 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Triple &TheTriple) {
IsLittleEndian = (TheTriple.getArch() == Triple::sparcel);
if (isV9) {
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
}
Data16bitsDirective = "\t.half\t";
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index b17977d41be1..6e00981939b6 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -14,7 +14,7 @@
using namespace llvm;
SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
- PointerSize = 8;
+ CodePointerSize = 8;
CalleeSaveStackSlotSize = 8;
IsLittleEndian = false;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 84d3c7bed50a..f2fd581f7847 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -829,7 +829,7 @@ bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
return isTruncateFree(FromType, ToType);
}
-bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 7d92a7355877..1c34dc43e8bb 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -454,7 +454,7 @@ public:
MachineBasicBlock *BB) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
bool allowTruncateForTailCall(Type *, Type *) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 2dcec5263fa1..5f8c78ed1683 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
WebAssemblyMCAsmInfoELF::~WebAssemblyMCAsmInfoELF() {}
WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) {
- PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
+ CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
@@ -55,7 +55,7 @@ WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) {
WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
- PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
+ CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index a0b008947491..544cd653fd72 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -94,6 +94,8 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc()));
++MCNumFixups;
encodeULEB128(uint64_t(MO.getImm()), OS);
+ } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) {
+ encodeSLEB128(int64_t(MO.getImm()), OS);
} else {
encodeULEB128(uint64_t(MO.getImm()), OS);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index f4c9a4ef6b9c..559165e4c86b 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -54,7 +54,7 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
void OptimizeReturned::visitCallSite(CallSite CS) {
for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
- if (CS.paramHasAttr(0, Attribute::Returned)) {
+ if (CS.paramHasAttr(i, Attribute::Returned)) {
Instruction *Inst = CS.getInstruction();
Value *Arg = CS.getArgOperand(i);
// Ignore constants, globals, undef, etc.
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 8dd5e8a03e2e..8e8e5fd1eff1 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -1,5 +1,15 @@
# Tests which are known to fail from the GCC torture test suite.
+# Syntax: Each line has a single test to be marked as a 'known failure' (or
+# 'exclusion'. Known failures are expected to fail, and will cause an error if
+# they pass. (Known failures that do not run at all will not cause an
+# error). The format is
+# <name> <attributes> # comment
+#
+# The attributes in this case represent the different arguments used to
+# compiler: 'wasm-s' is for compiling to .s files, and 'wasm-o' for compiling
+# to wasm object files (.o).
+
# Computed gotos are not supported (Cannot select BlockAddress/BRIND)
20040302-1.c
20071210-1.c
@@ -66,3 +76,21 @@ pr41935.c
920728-1.c
pr28865.c
widechar-2.c
+
+# crash: Running pass 'WebAssembly Explicit Locals' on function
+20020107-1.c wasm-o
+20030222-1.c wasm-o
+20071220-1.c wasm-o
+20071220-2.c wasm-o
+990130-1.c wasm-o
+pr38533.c wasm-o
+pr41239.c wasm-o
+pr43385.c wasm-o
+pr43560.c wasm-o
+pr45695.c wasm-o
+pr49279.c wasm-o
+pr49390.c wasm-o
+pr52286.c wasm-o
+
+# fatal error: error in backend: data symbols must have a size set with .size
+921110-1.c wasm-o
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 48a1d8f1330c..9c35a251e480 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -43,7 +43,7 @@ void X86MCAsmInfoDarwin::anchor() { }
X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
bool is64Bit = T.getArch() == Triple::x86_64;
if (is64Bit)
- PointerSize = CalleeSaveStackSlotSize = 8;
+ CodePointerSize = CalleeSaveStackSlotSize = 8;
AssemblerDialect = AsmWriterFlavor;
@@ -92,7 +92,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// For ELF, x86-64 pointer size depends on the ABI.
// For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
// with the x32 ABI, pointer size remains the default 4.
- PointerSize = (is64Bit && !isX32) ? 8 : 4;
+ CodePointerSize = (is64Bit && !isX32) ? 8 : 4;
// OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
@@ -129,7 +129,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
- PointerSize = 8;
+ CodePointerSize = 8;
WinEHEncodingType = WinEH::EncodingType::Itanium;
} else {
// 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just
@@ -156,7 +156,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
- PointerSize = 8;
+ CodePointerSize = 8;
WinEHEncodingType = WinEH::EncodingType::Itanium;
ExceptionsType = ExceptionHandling::WinEH;
} else {
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 78e0bca4158e..8678a13b95d0 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1698,21 +1698,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-// NOTE: this only has a subset of the full frame index logic. In
-// particular, the FI < 0 and AfterFPPop logic is handled in
-// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly
-// (probably?) it should be moved into here.
int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool IsFixed = MFI.isFixedObjectIndex(FI);
// We can't calculate offset from frame pointer if the stack is realigned,
// so enforce usage of stack/base pointer. The base pointer is used when we
// have dynamic allocas in addition to dynamic realignment.
if (TRI->hasBasePointer(MF))
- FrameReg = TRI->getBaseRegister();
+ FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
else if (TRI->needsStackRealignment(MF))
- FrameReg = TRI->getStackRegister();
+ FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
else
FrameReg = TRI->getFrameRegister(MF);
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index eb5c56ff2ff9..2d788bf0cf99 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1311,8 +1311,9 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
++Cost;
// If the base is a register with multiple uses, this
// transformation may save a mov.
- if ((AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base_Reg.getNode() &&
+ // FIXME: Don't rely on DELETED_NODEs.
+ if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
+ AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
!AM.Base_Reg.getNode()->hasOneUse()) ||
AM.BaseType == X86ISelAddressMode::FrameIndexBase)
--Cost;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7ff483063ec2..b5f29fb400ef 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2742,13 +2742,13 @@ static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
}
-bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
auto Attr =
CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
if (!CI->isTailCall() || Attr.getValueAsString() == "true")
return false;
- CallSite CS(CI);
+ ImmutableCallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
if (!mayTailCallThisCC(CalleeCC))
return false;
@@ -8327,13 +8327,13 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
Zeroable.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue();
- Val = Val.lshr((M % Scale) * ScalarSizeInBits);
+ Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
if (Val == 0)
Zeroable.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt();
- Val = Val.lshr((M % Scale) * ScalarSizeInBits);
+ Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
if (Val == 0)
Zeroable.setBit(i);
@@ -16069,7 +16069,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
unsigned EltBits = EltVT.getSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
APInt MaskElt =
- IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
+ IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
const fltSemantics &Sem =
EltVT == MVT::f64 ? APFloat::IEEEdouble() :
(IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
@@ -16132,9 +16132,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// The mask constants are automatically splatted for vector types.
unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue SignMask = DAG.getConstantFP(
- APFloat(Sem, APInt::getSignBit(EltSizeInBits)), dl, LogicVT);
+ APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
SDValue MagMask = DAG.getConstantFP(
- APFloat(Sem, ~APInt::getSignBit(EltSizeInBits)), dl, LogicVT);
+ APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
// First, clear all bits but the sign bit from the second operand (sign).
if (IsFakeVector)
@@ -17344,10 +17344,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// bits of the inputs before performing those operations.
if (FlipSigns) {
MVT EltVT = VT.getVectorElementType();
- SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl,
+ SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
VT);
- Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
- Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
}
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -22111,11 +22111,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
// i64 vector arithmetic shift can be emulated with the transform:
- // M = lshr(SIGN_BIT, Amt)
+ // M = lshr(SIGN_MASK, Amt)
// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
Op.getOpcode() == ISD::SRA) {
- SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT);
+ SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
R = DAG.getNode(ISD::XOR, dl, VT, R, M);
@@ -22647,7 +22647,7 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
- auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
+ auto PTy = cast<PointerType>(LI->getPointerOperandType());
return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;
}
@@ -26722,8 +26722,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// Low bits are known zero.
KnownZero.setLowBits(ShAmt);
} else {
- KnownZero = KnownZero.lshr(ShAmt);
- KnownOne = KnownOne.lshr(ShAmt);
+ KnownZero.lshrInPlace(ShAmt);
+ KnownOne.lshrInPlace(ShAmt);
// High bits are known zero.
KnownZero.setHighBits(ShAmt);
}
@@ -30152,7 +30152,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// x s< 0 ? x^C : 0 --> subus x, C
if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
- OpRHSConst->getAPIntValue().isSignBit())
+ OpRHSConst->getAPIntValue().isSignMask())
// Note that we have to rebuild the RHS constant here to ensure we
// don't rely on particular values of undef lanes.
return DAG.getNode(
@@ -30203,7 +30203,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
- APInt DemandedMask(APInt::getSignBit(BitWidth));
+ APInt DemandedMask(APInt::getSignMask(BitWidth));
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
@@ -31269,7 +31269,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
else if (X86ISD::VSRAI == Opcode)
Elt = Elt.ashr(ShiftImm);
else
- Elt = Elt.lshr(ShiftImm);
+ Elt.lshrInPlace(ShiftImm);
}
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
}
@@ -32234,8 +32234,8 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V);
if (!BV || !BV->isConstant())
return false;
- for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(i));
+ for (SDValue Op : V->ops()) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C)
return false;
uint64_t Val = C->getZExtValue();
@@ -33428,8 +33428,8 @@ static SDValue isFNEG(SDNode *N) {
SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
unsigned EltBits = Op1.getScalarValueSizeInBits();
- auto isSignBitValue = [&](const ConstantFP *C) {
- return C->getValueAPF().bitcastToAPInt() == APInt::getSignBit(EltBits);
+ auto isSignMask = [&](const ConstantFP *C) {
+ return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits);
};
// There is more than one way to represent the same constant on
@@ -33440,21 +33440,21 @@ static SDValue isFNEG(SDNode *N) {
// We check all variants here.
if (Op1.getOpcode() == X86ISD::VBROADCAST) {
if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
- if (isSignBitValue(cast<ConstantFP>(C)))
+ if (isSignMask(cast<ConstantFP>(C)))
return Op0;
} else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
- if (isSignBitValue(CN->getConstantFPValue()))
+ if (isSignMask(CN->getConstantFPValue()))
return Op0;
} else if (auto *C = getTargetConstantFromNode(Op1)) {
if (C->getType()->isVectorTy()) {
if (auto *SplatV = C->getSplatValue())
- if (isSignBitValue(cast<ConstantFP>(SplatV)))
+ if (isSignMask(cast<ConstantFP>(SplatV)))
return Op0;
} else if (auto *FPConst = dyn_cast<ConstantFP>(C))
- if (isSignBitValue(FPConst))
+ if (isSignMask(FPConst))
return Op0;
}
return SDValue();
@@ -34631,7 +34631,7 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
return SDValue();
ShrinkMode Mode;
- if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode))
+ if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
return SDValue();
EVT VT = N->getValueType(0);
@@ -35922,14 +35922,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.is64Bit()) {
Res.first = X86::RAX;
Res.second = &X86::GR64_ADRegClass;
- } else if (Subtarget.is32Bit()) {
+ } else {
+ assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
+ "Expecting 64, 32 or 16 bit subtarget");
Res.first = X86::EAX;
Res.second = &X86::GR32_ADRegClass;
- } else if (Subtarget.is16Bit()) {
- Res.first = X86::AX;
- Res.second = &X86::GR16_ADRegClass;
- } else {
- llvm_unreachable("Expecting 64, 32 or 16 bit subtarget");
}
return Res;
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index ab4910daca02..190a88335000 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1207,7 +1207,7 @@ namespace llvm {
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
- bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const override;
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index 6cc5e8b63597..fb9315792892 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -67,6 +67,8 @@ private:
MachineFunction &MF) const;
bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
const X86Subtarget &STI;
const X86InstrInfo &TII;
@@ -99,6 +101,10 @@ X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
if (RB.getID() == X86::GPRRegBankID) {
+ if (Ty.getSizeInBits() <= 8)
+ return &X86::GR8RegClass;
+ if (Ty.getSizeInBits() == 16)
+ return &X86::GR16RegClass;
if (Ty.getSizeInBits() == 32)
return &X86::GR32RegClass;
if (Ty.getSizeInBits() == 64)
@@ -207,6 +213,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectConstant(I, MRI, MF))
return true;
+ if (selectTrunc(I, MRI, MF))
+ return true;
return selectImpl(I);
}
@@ -509,6 +517,59 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+bool X86InstructionSelector::selectTrunc(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_TRUNC)
+ return false;
+
+ const unsigned DstReg = I.getOperand(0).getReg();
+ const unsigned SrcReg = I.getOperand(1).getReg();
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+
+ const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
+
+ if (DstRB.getID() != SrcRB.getID()) {
+ DEBUG(dbgs() << "G_TRUNC input/output on different banks\n");
+ return false;
+ }
+
+ if (DstRB.getID() != X86::GPRRegBankID)
+ return false;
+
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
+ if (!DstRC)
+ return false;
+
+ const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
+ if (!SrcRC)
+ return false;
+
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ return false;
+ }
+
+ if (DstRC == SrcRC) {
+ // Nothing to be done
+ } else if (DstRC == &X86::GR32RegClass) {
+ I.getOperand(1).setSubReg(X86::sub_32bit);
+ } else if (DstRC == &X86::GR16RegClass) {
+ I.getOperand(1).setSubReg(X86::sub_16bit);
+ } else if (DstRC == &X86::GR8RegClass) {
+ I.getOperand(1).setSubReg(X86::sub_8bit);
+ } else {
+ return false;
+ }
+
+ I.setDesc(TII.get(X86::COPY));
+ return true;
+}
+
InstructionSelector *
llvm::createX86InstructionSelector(X86Subtarget &Subtarget,
X86RegisterBankInfo &RBI) {
diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp
index d395c826e6bf..0f8a750a0235 100644
--- a/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -68,6 +68,7 @@ X86GenRegisterBankInfo::PartialMappingIdx
X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) {
if ((Ty.isScalar() && !isFP) || Ty.isPointer()) {
switch (Ty.getSizeInBits()) {
+ case 1:
case 8:
return PMI_GPR8;
case 16:
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 58fa31e94fba..25958f0c3106 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -133,6 +133,11 @@ public:
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const;
unsigned getStackRegister() const { return StackPtr; }
unsigned getBaseRegister() const { return BasePtr; }
+ /// Returns physical register used as frame pointer.
+ /// This will always returns the frame pointer register, contrary to
+ /// getFrameRegister() which returns the "base pointer" in situations
+ /// involving a stack, frame and base pointer.
+ unsigned getFramePtr() const { return FramePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
};
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index c177ba1d52f7..d235d2b40b15 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -438,7 +438,6 @@ def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
(add LOW32_ADDR_ACCESS, RBP)>;
// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
-def GR16_AD : RegisterClass<"X86", [i16], 16, (add AX, DX)>;
def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;