aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp5
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp49
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp6
-rw-r--r--lib/Analysis/InstructionSimplify.cpp74
-rw-r--r--lib/Analysis/MemorySSA.cpp23
-rw-r--r--lib/Analysis/ScalarEvolution.cpp51
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp16
-rw-r--r--lib/Analysis/ValueTracking.cpp122
-rw-r--r--lib/AsmParser/LLParser.cpp8
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp4
-rw-r--r--lib/Bitcode/Reader/ValueList.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp41
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp22
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp27
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp32
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp26
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--lib/CodeGen/InterleavedAccessPass.cpp19
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp20
-rw-r--r--lib/CodeGen/LiveRangeShrink.cpp211
-rw-r--r--lib/CodeGen/LowerEmuTLS.cpp22
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp45
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp4
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp42
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp8
-rw-r--r--lib/CodeGen/SafeStack.cpp22
-rw-r--r--lib/CodeGen/SafeStackColoring.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp436
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp49
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp4
-rw-r--r--lib/CodeGen/StackProtector.cpp13
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp21
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp10
-rw-r--r--lib/DebugInfo/CodeView/CMakeLists.txt7
-rw-r--r--lib/DebugInfo/CodeView/CVTypeDumper.cpp61
-rw-r--r--lib/DebugInfo/CodeView/CVTypeVisitor.cpp188
-rw-r--r--lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp229
-rw-r--r--lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp89
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp13
-rw-r--r--lib/DebugInfo/CodeView/TypeDatabase.cpp75
-rw-r--r--lib/DebugInfo/CodeView/TypeDumpVisitor.cpp13
-rw-r--r--lib/DebugInfo/CodeView/TypeIndex.cpp27
-rw-r--r--lib/DebugInfo/CodeView/TypeSerializer.cpp33
-rw-r--r--lib/DebugInfo/CodeView/TypeStreamMerger.cpp14
-rw-r--r--lib/DebugInfo/CodeView/TypeTableCollection.cpp83
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp34
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp31
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStream.cpp8
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp1
-rw-r--r--lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp1
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp2
-rw-r--r--lib/Demangle/ItaniumDemangle.cpp2
-rw-r--r--lib/IR/Attributes.cpp2
-rw-r--r--lib/IR/AutoUpgrade.cpp1
-rw-r--r--lib/IR/Constants.cpp15
-rw-r--r--lib/IR/ConstantsContext.h20
-rw-r--r--lib/IR/DataLayout.cpp2
-rw-r--r--lib/IR/Function.cpp2
-rw-r--r--lib/IR/IRBuilder.cpp5
-rw-r--r--lib/IR/InlineAsm.cpp4
-rw-r--r--lib/IR/Instruction.cpp2
-rw-r--r--lib/IR/Instructions.cpp27
-rw-r--r--lib/IR/LLVMContextImpl.cpp21
-rw-r--r--lib/IR/PassRegistry.cpp2
-rw-r--r--lib/IR/User.cpp10
-rw-r--r--lib/IR/Value.cpp29
-rw-r--r--lib/IR/ValueTypes.cpp2
-rw-r--r--lib/IR/Verifier.cpp6
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp72
-rw-r--r--lib/Linker/IRMover.cpp26
-rw-r--r--lib/Object/Binary.cpp4
-rw-r--r--lib/Object/CMakeLists.txt3
-rw-r--r--lib/Object/COFFImportFile.cpp527
-rw-r--r--lib/Object/COFFModuleDefinition.cpp319
-rw-r--r--lib/Object/Decompressor.cpp5
-rw-r--r--lib/Object/WindowsResource.cpp90
-rw-r--r--lib/Passes/PassBuilder.cpp9
-rw-r--r--lib/Support/APInt.cpp253
-rw-r--r--lib/Support/BinaryStreamReader.cpp18
-rw-r--r--lib/Support/BinaryStreamRef.cpp137
-rw-r--r--lib/Support/BinaryStreamWriter.cpp11
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/FormattedStream.cpp1
-rw-r--r--lib/Support/Triple.cpp18
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td10
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp5
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h6
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp11
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h20
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp15
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.h3
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallingConv.td50
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp24
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp120
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp163
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h5
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td2
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp21
-rw-r--r--lib/Target/AMDGPU/AMDGPUMCInstLower.cpp8
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.cpp17
-rw-r--r--lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp31
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.cpp45
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.h3
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp20
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp25
-rw-r--r--lib/Target/AMDGPU/BUFInstructions.td4
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp14
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h2
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.cpp146
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.h2
-rw-r--r--lib/Target/AMDGPU/R600ClauseMergePass.cpp6
-rw-r--r--lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp6
-rw-r--r--lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp6
-rw-r--r--lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp6
-rw-r--r--lib/Target/AMDGPU/R600Packetizer.cpp6
-rw-r--r--lib/Target/AMDGPU/R600RegisterInfo.cpp12
-rw-r--r--lib/Target/AMDGPU/R600RegisterInfo.h2
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp20
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.h2
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp174
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h11
-rw-r--r--lib/Target/AMDGPU/SIInstrFormats.td16
-rw-r--r--lib/Target/AMDGPU/SILoadStoreOptimizer.cpp8
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp19
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h5
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp45
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp120
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h14
-rw-r--r--lib/Target/AMDGPU/SOPInstructions.td18
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp13
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h1
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/Mips.h12
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp17
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp12
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp87
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp16
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.cpp18
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp6
-rw-r--r--lib/Target/Mips/MipsOs16.cpp4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp24
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp15
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td8
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp50
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp2
-rwxr-xr-xlib/Target/Sparc/LeonPasses.cpp17
-rwxr-xr-xlib/Target/Sparc/LeonPasses.h11
-rw-r--r--lib/Target/Sparc/Sparc.h2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp14
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp2
-rw-r--r--lib/Target/X86/X86.td3
-rw-r--r--lib/Target/X86/X86CallingConv.td42
-rw-r--r--lib/Target/X86/X86FastISel.cpp7
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp269
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp162
-rw-r--r--lib/Target/X86/X86InstrAVX512.td132
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td6
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp4
-rw-r--r--lib/Target/X86/X86InstrInfo.td13
-rw-r--r--lib/Target/X86/X86InstrTSX.td5
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp24
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp19
-rw-r--r--lib/Target/X86/X86RegisterInfo.td4
-rw-r--r--lib/Target/X86/X86Subtarget.cpp7
-rw-r--r--lib/Target/X86/X86Subtarget.h15
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp15
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp36
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp79
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp19
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp128
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h54
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp11
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp2
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp33
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp71
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp86
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp1
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp815
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/SROA.cpp8
-rw-r--r--lib/Transforms/Scalar/StraightLineStrengthReduce.cpp2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp6
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp82
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp2
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp4
198 files changed, 5407 insertions, 2424 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index a33c01a0e461..f743cb234c45 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -683,8 +683,11 @@ static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) {
#ifndef NDEBUG
static const Function *getParent(const Value *V) {
- if (const Instruction *inst = dyn_cast<Instruction>(V))
+ if (const Instruction *inst = dyn_cast<Instruction>(V)) {
+ if (!inst->getParent())
+ return nullptr;
return inst->getParent()->getParent();
+ }
if (const Argument *arg = dyn_cast<Argument>(V))
return arg->getParent();
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index db87b17c1567..267e19adfe4d 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -58,45 +58,12 @@ char BranchProbabilityInfoWrapperPass::ID = 0;
static const uint32_t LBH_TAKEN_WEIGHT = 124;
static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
-/// \brief Unreachable-terminating branch taken weight.
+/// \brief Unreachable-terminating branch taken probability.
///
-/// This is the weight for a branch being taken to a block that terminates
+/// This is the probability for a branch being taken to a block that terminates
/// (eventually) in unreachable. These are predicted as unlikely as possible.
-static const uint32_t UR_TAKEN_WEIGHT = 1;
-
-/// \brief Unreachable-terminating branch not-taken weight.
-///
-/// This is the weight for a branch not being taken toward a block that
-/// terminates (eventually) in unreachable. Such a branch is essentially never
-/// taken. Set the weight to an absurdly high value so that nested loops don't
-/// easily subsume it.
-static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
-
-/// \brief Returns the branch probability for unreachable edge according to
-/// heuristic.
-///
-/// This is the branch probability being taken to a block that terminates
-/// (eventually) in unreachable. These are predicted as unlikely as possible.
-static BranchProbability getUnreachableProbability(uint64_t UnreachableCount) {
- assert(UnreachableCount > 0 && "UnreachableCount must be > 0");
- return BranchProbability::getBranchProbability(
- UR_TAKEN_WEIGHT,
- (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * UnreachableCount);
-}
-
-/// \brief Returns the branch probability for reachable edge according to
-/// heuristic.
-///
-/// This is the branch probability not being taken toward a block that
-/// terminates (eventually) in unreachable. Such a branch is essentially never
-/// taken. Set the weight to an absurdly high value so that nested loops don't
-/// easily subsume it.
-static BranchProbability getReachableProbability(uint64_t ReachableCount) {
- assert(ReachableCount > 0 && "ReachableCount must be > 0");
- return BranchProbability::getBranchProbability(
- UR_NONTAKEN_WEIGHT,
- (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * ReachableCount);
-}
+/// All reachable probability will equally share the remaining part.
+static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
/// \brief Weight for a branch taken going into a cold block.
///
@@ -232,8 +199,10 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
return true;
}
- auto UnreachableProb = getUnreachableProbability(UnreachableEdges.size());
- auto ReachableProb = getReachableProbability(ReachableEdges.size());
+ auto UnreachableProb = UR_TAKEN_PROB;
+ auto ReachableProb =
+ (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) /
+ ReachableEdges.size();
for (unsigned SuccIdx : UnreachableEdges)
setEdgeProbability(BB, SuccIdx, UnreachableProb);
@@ -319,7 +288,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
// If the unreachable heuristic is more strong then we use it for this edge.
if (UnreachableIdxs.size() > 0 && ReachableIdxs.size() > 0) {
auto ToDistribute = BranchProbability::getZero();
- auto UnreachableProb = getUnreachableProbability(UnreachableIdxs.size());
+ auto UnreachableProb = UR_TAKEN_PROB;
for (auto i : UnreachableIdxs)
if (UnreachableProb < BP[i]) {
ToDistribute += BP[i] - UnreachableProb;
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 8058e5b1935c..5896e6e0902f 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -477,10 +477,8 @@ bool CGPassManager::runOnModule(Module &M) {
if (DevirtualizedCall)
DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration
<< " times, due to -max-cg-scc-iterations\n");
-
- if (Iteration > MaxSCCIterations)
- MaxSCCIterations = Iteration;
-
+
+ MaxSCCIterations.updateMax(Iteration);
}
Changed |= doFinalization(CG);
return Changed;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 5652248a60ce..2e72d5aa8269 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -126,8 +126,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
/// Returns the simplified value, or null if no simplification was performed.
static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
- Instruction::BinaryOps OpcodeToExpand, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExpand,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
@@ -184,7 +184,8 @@ static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
/// Generic simplifications for associative binary operations.
/// Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
- Value *LHS, Value *RHS, const SimplifyQuery &Q,
+ Value *LHS, Value *RHS,
+ const SimplifyQuery &Q,
unsigned MaxRecurse) {
assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
@@ -2260,28 +2261,49 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
if (!OpTy->getScalarType()->isIntegerTy(1))
return nullptr;
- switch (Pred) {
- default:
- break;
- case ICmpInst::ICMP_EQ:
- // X == 1 -> X
- if (match(RHS, m_One()))
- return LHS;
- break;
- case ICmpInst::ICMP_NE:
- // X != 0 -> X
- if (match(RHS, m_Zero()))
+ // A boolean compared to true/false can be simplified in 14 out of the 20
+ // (10 predicates * 2 constants) possible combinations. Cases not handled here
+ // require a 'not' of the LHS, so those must be transformed in InstCombine.
+ if (match(RHS, m_Zero())) {
+ switch (Pred) {
+ case CmpInst::ICMP_NE: // X != 0 -> X
+ case CmpInst::ICMP_UGT: // X >u 0 -> X
+ case CmpInst::ICMP_SLT: // X <s 0 -> X
return LHS;
- break;
- case ICmpInst::ICMP_UGT:
- // X >u 0 -> X
- if (match(RHS, m_Zero()))
+
+ case CmpInst::ICMP_ULT: // X <u 0 -> false
+ case CmpInst::ICMP_SGT: // X >s 0 -> false
+ return getFalse(ITy);
+
+ case CmpInst::ICMP_UGE: // X >=u 0 -> true
+ case CmpInst::ICMP_SLE: // X <=s 0 -> true
+ return getTrue(ITy);
+
+ default: break;
+ }
+ } else if (match(RHS, m_One())) {
+ switch (Pred) {
+ case CmpInst::ICMP_EQ: // X == 1 -> X
+ case CmpInst::ICMP_UGE: // X >=u 1 -> X
+ case CmpInst::ICMP_SLE: // X <=s -1 -> X
return LHS;
+
+ case CmpInst::ICMP_UGT: // X >u 1 -> false
+ case CmpInst::ICMP_SLT: // X <s -1 -> false
+ return getFalse(ITy);
+
+ case CmpInst::ICMP_ULE: // X <=u 1 -> true
+ case CmpInst::ICMP_SGE: // X >=s -1 -> true
+ return getTrue(ITy);
+
+ default: break;
+ }
+ }
+
+ switch (Pred) {
+ default:
break;
case ICmpInst::ICMP_UGE:
- // X >=u 1 -> X
- if (match(RHS, m_One()))
- return LHS;
if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
return getTrue(ITy);
break;
@@ -2296,16 +2318,6 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
return getTrue(ITy);
break;
- case ICmpInst::ICMP_SLT:
- // X <s 0 -> X
- if (match(RHS, m_Zero()))
- return LHS;
- break;
- case ICmpInst::ICMP_SLE:
- // X <=s -1 -> X
- if (match(RHS, m_One()))
- return LHS;
- break;
case ICmpInst::ICMP_ULE:
if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
return getTrue(ITy);
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 2480fe44d5c0..e0e04a91410f 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -1799,6 +1799,15 @@ bool MemorySSA::dominates(const MemoryAccess *Dominator,
const static char LiveOnEntryStr[] = "liveOnEntry";
+void MemoryAccess::print(raw_ostream &OS) const {
+ switch (getValueID()) {
+ case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
+ case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS);
+ case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS);
+ }
+ llvm_unreachable("invalid value id");
+}
+
void MemoryDef::print(raw_ostream &OS) const {
MemoryAccess *UO = getDefiningAccess();
@@ -1836,8 +1845,6 @@ void MemoryPhi::print(raw_ostream &OS) const {
OS << ')';
}
-MemoryAccess::~MemoryAccess() {}
-
void MemoryUse::print(raw_ostream &OS) const {
MemoryAccess *UO = getDefiningAccess();
OS << "MemoryUse(";
@@ -2054,3 +2061,15 @@ MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
return StartingAccess;
}
} // namespace llvm
+
+void MemoryPhi::deleteMe(DerivedUser *Self) {
+ delete static_cast<MemoryPhi *>(Self);
+}
+
+void MemoryDef::deleteMe(DerivedUser *Self) {
+ delete static_cast<MemoryDef *>(Self);
+}
+
+void MemoryUse::deleteMe(DerivedUser *Self) {
+ delete static_cast<MemoryUse *>(Self);
+}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index a746ddfd7a63..78ded8141c08 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3885,7 +3885,7 @@ public:
: SCEVRewriteVisitor(SE), L(L), Valid(true) {}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+ if (!SE.isLoopInvariant(Expr, L))
Valid = false;
return Expr;
}
@@ -3919,7 +3919,7 @@ public:
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
// Only allow AddRecExprs for this loop.
- if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+ if (!SE.isLoopInvariant(Expr, L))
Valid = false;
return Expr;
}
@@ -5947,6 +5947,8 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax())
return SE->getCouldNotCompute();
+ assert((isa<SCEVCouldNotCompute>(getMax()) || isa<SCEVConstant>(getMax())) &&
+ "No point in having a non-constant max backedge taken count!");
return getMax();
}
@@ -5972,7 +5974,11 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
}
ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
- : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {}
+ : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {
+ assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
+ isa<SCEVConstant>(MaxNotTaken)) &&
+ "No point in having a non-constant max backedge taken count!");
+}
ScalarEvolution::ExitLimit::ExitLimit(
const SCEV *E, const SCEV *M, bool MaxOrZero,
@@ -5981,6 +5987,9 @@ ScalarEvolution::ExitLimit::ExitLimit(
assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
!isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
"Exact is not allowed to be less precise than Max");
+ assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
+ isa<SCEVConstant>(MaxNotTaken)) &&
+ "No point in having a non-constant max backedge taken count!");
for (auto *PredSet : PredSetList)
for (auto *P : *PredSet)
addPredicate(P);
@@ -5989,11 +5998,19 @@ ScalarEvolution::ExitLimit::ExitLimit(
ScalarEvolution::ExitLimit::ExitLimit(
const SCEV *E, const SCEV *M, bool MaxOrZero,
const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
- : ExitLimit(E, M, MaxOrZero, {&PredSet}) {}
+ : ExitLimit(E, M, MaxOrZero, {&PredSet}) {
+ assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
+ isa<SCEVConstant>(MaxNotTaken)) &&
+ "No point in having a non-constant max backedge taken count!");
+}
ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
bool MaxOrZero)
- : ExitLimit(E, M, MaxOrZero, None) {}
+ : ExitLimit(E, M, MaxOrZero, None) {
+ assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
+ isa<SCEVConstant>(MaxNotTaken)) &&
+ "No point in having a non-constant max backedge taken count!");
+}
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
@@ -6018,6 +6035,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
});
+ assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) &&
+ "No point in having a non-constant max backedge taken count!");
}
/// Invalidate this result and free the ExitNotTakenInfo array.
@@ -6279,7 +6298,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
// to not.
if (isa<SCEVCouldNotCompute>(MaxBECount) &&
!isa<SCEVCouldNotCompute>(BECount))
- MaxBECount = BECount;
+ MaxBECount = getConstant(getUnsignedRange(BECount).getUnsignedMax());
return ExitLimit(BECount, MaxBECount, false,
{&EL0.Predicates, &EL1.Predicates});
@@ -7583,13 +7602,20 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
loopHasNoAbnormalExits(AddRec->getLoop())) {
const SCEV *Exact =
getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
- return ExitLimit(Exact, Exact, false, Predicates);
+ const SCEV *Max =
+ Exact == getCouldNotCompute()
+ ? Exact
+ : getConstant(getUnsignedRange(Exact).getUnsignedMax());
+ return ExitLimit(Exact, Max, false, Predicates);
}
// Solve the general equation.
- const SCEV *E = SolveLinEquationWithOverflow(
- StepC->getAPInt(), getNegativeSCEV(Start), *this);
- return ExitLimit(E, E, false, Predicates);
+ const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
+ getNegativeSCEV(Start), *this);
+ const SCEV *M = E == getCouldNotCompute()
+ ? E
+ : getConstant(getUnsignedRange(E).getUnsignedMax());
+ return ExitLimit(E, M, false, Predicates);
}
ScalarEvolution::ExitLimit
@@ -9218,8 +9244,9 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
getConstant(StrideForMaxBECount), false);
}
- if (isa<SCEVCouldNotCompute>(MaxBECount))
- MaxBECount = BECount;
+ if (isa<SCEVCouldNotCompute>(MaxBECount) &&
+ !isa<SCEVCouldNotCompute>(BECount))
+ MaxBECount = getConstant(getUnsignedRange(BECount).getUnsignedMax());
return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
}
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 3cf1bbc5daa5..2be5d5caf7c2 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -1518,6 +1519,21 @@ TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) {
return *Impl;
}
+unsigned TargetLibraryInfoImpl::getTargetWCharSize(const Triple &T) {
+ // See also clang/lib/Basic/Targets.cpp.
+ if (T.isPS4() || T.isOSWindows() || T.isArch16Bit())
+ return 2;
+ if (T.getArch() == Triple::xcore)
+ return 1;
+ return 4;
+}
+
+unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const {
+ if (auto *ShortWChar = cast_or_null<ConstantAsMetadata>(
+ M.getModuleFlag("wchar_size")))
+ return cast<ConstantInt>(ShortWChar->getValue())->getZExtValue();
+ return getTargetWCharSize(Triple(M.getTargetTriple()));
+}
TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
: ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index cba7363a0afa..8e6c1096eec8 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
@@ -2953,14 +2954,16 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
return Ptr;
}
-bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) {
+bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
+ unsigned CharSize) {
// Make sure the GEP has exactly three arguments.
if (GEP->getNumOperands() != 3)
return false;
- // Make sure the index-ee is a pointer to array of i8.
+ // Make sure the index-ee is a pointer to array of \p CharSize integers.
+ // CharSize.
ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
- if (!AT || !AT->getElementType()->isIntegerTy(8))
+ if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
return false;
// Check to make sure that the first operand of the GEP is an integer and
@@ -2972,11 +2975,9 @@ bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) {
return true;
}
-/// This function computes the length of a null-terminated C string pointed to
-/// by V. If successful, it returns true and returns the string in Str.
-/// If unsuccessful, it returns false.
-bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
- uint64_t Offset, bool TrimAtNul) {
+bool llvm::getConstantDataArrayInfo(const Value *V,
+ ConstantDataArraySlice &Slice,
+ unsigned ElementSize, uint64_t Offset) {
assert(V);
// Look through bitcast instructions and geps.
@@ -2987,7 +2988,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// The GEP operator should be based on a pointer to string constant, and is
// indexing into the string constant.
- if (!isGEPBasedOnPointerToString(GEP))
+ if (!isGEPBasedOnPointerToString(GEP, ElementSize))
return false;
// If the second index isn't a ConstantInt, then this is a variable index
@@ -2998,8 +2999,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
StartIdx = CI->getZExtValue();
else
return false;
- return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset,
- TrimAtNul);
+ return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize,
+ StartIdx + Offset);
}
// The GEP instruction, constant or instruction, must reference a global
@@ -3009,30 +3010,72 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return false;
- // Handle the all-zeros case.
+ const ConstantDataArray *Array;
+ ArrayType *ArrayTy;
if (GV->getInitializer()->isNullValue()) {
- // This is a degenerate case. The initializer is constant zero so the
- // length of the string must be zero.
- Str = "";
- return true;
+ Type *GVTy = GV->getValueType();
+ if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) {
+ // A zeroinitializer for the array; There is no ConstantDataArray.
+ Array = nullptr;
+ } else {
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy);
+ uint64_t Length = SizeInBytes / (ElementSize / 8);
+ if (Length <= Offset)
+ return false;
+
+ Slice.Array = nullptr;
+ Slice.Offset = 0;
+ Slice.Length = Length - Offset;
+ return true;
+ }
+ } else {
+ // This must be a ConstantDataArray.
+ Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
+ if (!Array)
+ return false;
+ ArrayTy = Array->getType();
}
+ if (!ArrayTy->getElementType()->isIntegerTy(ElementSize))
+ return false;
- // This must be a ConstantDataArray.
- const auto *Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
- if (!Array || !Array->isString())
+ uint64_t NumElts = ArrayTy->getArrayNumElements();
+ if (Offset > NumElts)
return false;
- // Get the number of elements in the array.
- uint64_t NumElts = Array->getType()->getArrayNumElements();
+ Slice.Array = Array;
+ Slice.Offset = Offset;
+ Slice.Length = NumElts - Offset;
+ return true;
+}
- // Start out with the entire array in the StringRef.
- Str = Array->getAsString();
+/// This function computes the length of a null-terminated C string pointed to
+/// by V. If successful, it returns true and returns the string in Str.
+/// If unsuccessful, it returns false.
+bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
+ uint64_t Offset, bool TrimAtNul) {
+ ConstantDataArraySlice Slice;
+ if (!getConstantDataArrayInfo(V, Slice, 8, Offset))
+ return false;
- if (Offset > NumElts)
+ if (Slice.Array == nullptr) {
+ if (TrimAtNul) {
+ Str = StringRef();
+ return true;
+ }
+ if (Slice.Length == 1) {
+ Str = StringRef("", 1);
+ return true;
+ }
+ // We cannot instantiate a StringRef as we do not have an apropriate string
+ // of 0s at hand.
return false;
+ }
+ // Start out with the entire array in the StringRef.
+ Str = Slice.Array->getAsString();
// Skip over 'offset' bytes.
- Str = Str.substr(Offset);
+ Str = Str.substr(Slice.Offset);
if (TrimAtNul) {
// Trim off the \0 and anything after it. If the array is not nul
@@ -3050,7 +3093,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
static uint64_t GetStringLengthH(const Value *V,
- SmallPtrSetImpl<const PHINode*> &PHIs) {
+ SmallPtrSetImpl<const PHINode*> &PHIs,
+ unsigned CharSize) {
// Look through noop bitcast instructions.
V = V->stripPointerCasts();
@@ -3063,7 +3107,7 @@ static uint64_t GetStringLengthH(const Value *V,
// If it was new, see if all the input strings are the same length.
uint64_t LenSoFar = ~0ULL;
for (Value *IncValue : PN->incoming_values()) {
- uint64_t Len = GetStringLengthH(IncValue, PHIs);
+ uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
if (Len == 0) return 0; // Unknown length -> unknown.
if (Len == ~0ULL) continue;
@@ -3079,9 +3123,9 @@ static uint64_t GetStringLengthH(const Value *V,
// strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
- uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+ uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
if (Len1 == 0) return 0;
- uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+ uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
if (Len2 == 0) return 0;
if (Len1 == ~0ULL) return Len2;
if (Len2 == ~0ULL) return Len1;
@@ -3090,20 +3134,30 @@ static uint64_t GetStringLengthH(const Value *V,
}
// Otherwise, see if we can read the string.
- StringRef StrData;
- if (!getConstantStringInfo(V, StrData))
+ ConstantDataArraySlice Slice;
+ if (!getConstantDataArrayInfo(V, Slice, CharSize))
return 0;
- return StrData.size()+1;
+ if (Slice.Array == nullptr)
+ return 1;
+
+ // Search for nul characters
+ unsigned NullIndex = 0;
+ for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
+ if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
+ break;
+ }
+
+ return NullIndex + 1;
}
/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
-uint64_t llvm::GetStringLength(const Value *V) {
+uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
if (!V->getType()->isPointerTy()) return 0;
SmallPtrSet<const PHINode*, 32> PHIs;
- uint64_t Len = GetStringLengthH(V, PHIs);
+ uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
// If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
// an empty string as a length.
return Len == ~0ULL ? 1 : Len;
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index d7602c83435c..ff1ea44a18a7 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -2502,7 +2502,7 @@ LLParser::PerFunctionState::~PerFunctionState() {
continue;
P.second.first->replaceAllUsesWith(
UndefValue::get(P.second.first->getType()));
- delete P.second.first;
+ P.second.first->deleteValue();
}
for (const auto &P : ForwardRefValIDs) {
@@ -2510,7 +2510,7 @@ LLParser::PerFunctionState::~PerFunctionState() {
continue;
P.second.first->replaceAllUsesWith(
UndefValue::get(P.second.first->getType()));
- delete P.second.first;
+ P.second.first->deleteValue();
}
}
@@ -2642,7 +2642,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
getTypeString(FI->second.first->getType()) + "'");
Sentinel->replaceAllUsesWith(Inst);
- delete Sentinel;
+ Sentinel->deleteValue();
ForwardRefValIDs.erase(FI);
}
@@ -2659,7 +2659,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
getTypeString(FI->second.first->getType()) + "'");
Sentinel->replaceAllUsesWith(Inst);
- delete Sentinel;
+ Sentinel->deleteValue();
ForwardRefVals.erase(FI);
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 76298121566a..686c94687669 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -4489,11 +4489,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// Add instruction to end of current BB. If there is no current BB, reject
// this file.
if (!CurBB) {
- delete I;
+ I->deleteValue();
return error("Invalid instruction with no BB");
}
if (!OperandBundles.empty()) {
- delete I;
+ I->deleteValue();
return error("Operand bundles found with no consumer");
}
CurBB->getInstList().push_back(I);
diff --git a/lib/Bitcode/Reader/ValueList.cpp b/lib/Bitcode/Reader/ValueList.cpp
index d1a2a11bbfad..f2a3439a87be 100644
--- a/lib/Bitcode/Reader/ValueList.cpp
+++ b/lib/Bitcode/Reader/ValueList.cpp
@@ -73,7 +73,7 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
// If there was a forward reference to this value, replace it.
Value *PrevVal = OldV;
OldV->replaceAllUsesWith(V);
- delete PrevVal;
+ PrevVal->deleteValue();
}
}
@@ -194,6 +194,6 @@ void BitcodeReaderValueList::resolveConstantForwardRefs() {
// Update all ValueHandles, they should be the only users at this point.
Placeholder->replaceAllUsesWith(RealVal);
- delete Placeholder;
+ Placeholder->deleteValue();
}
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 7d945690e9c3..1b39e46ee466 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -13,7 +13,6 @@
#include "CodeViewDebug.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/Line.h"
@@ -23,6 +22,7 @@
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/IR/Constants.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -469,17 +469,21 @@ void CodeViewDebug::emitTypeInformation() {
CommentPrefix += ' ';
}
- TypeDatabase TypeDB(TypeTable.records().size());
- CVTypeDumper CVTD(TypeDB);
- TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef<uint8_t> Record) {
+ TypeTableCollection Table(TypeTable.records());
+ Optional<TypeIndex> B = Table.getFirst();
+ while (B) {
+ // This will fail if the record data is invalid.
+ CVType Record = Table.getType(*B);
+
if (OS.isVerboseAsm()) {
// Emit a block comment describing the type record for readability.
SmallString<512> CommentBlock;
raw_svector_ostream CommentOS(CommentBlock);
ScopedPrinter SP(CommentOS);
SP.setPrefix(CommentPrefix);
- TypeDumpVisitor TDV(TypeDB, &SP, false);
- Error E = CVTD.dump(Record, TDV);
+ TypeDumpVisitor TDV(Table, &SP, false);
+
+ Error E = codeview::visitTypeRecord(Record, *B, TDV);
if (E) {
logAllUnhandledErrors(std::move(E), errs(), "error: ");
llvm_unreachable("produced malformed type record");
@@ -489,29 +493,10 @@ void CodeViewDebug::emitTypeInformation() {
// newline.
OS.emitRawComment(
CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
- } else {
-#ifndef NDEBUG
- // Assert that the type data is valid even if we aren't dumping
- // comments. The MSVC linker doesn't do much type record validation,
- // so the first link of an invalid type record can succeed while
- // subsequent links will fail with LNK1285.
- BinaryByteStream Stream(Record, llvm::support::little);
- CVTypeArray Types;
- BinaryStreamReader Reader(Stream);
- Error E = Reader.readArray(Types, Reader.getLength());
- if (!E) {
- TypeVisitorCallbacks C;
- E = codeview::visitTypeStream(Types, C);
- }
- if (E) {
- logAllUnhandledErrors(std::move(E), errs(), "error: ");
- llvm_unreachable("produced malformed type record");
- }
-#endif
}
- StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size());
- OS.EmitBinaryData(S);
- });
+ OS.EmitBinaryData(Record.str_data());
+ B = Table.getNext(*B);
+ }
}
namespace {
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 17e6be05eb42..984973cf3a3b 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -35,12 +36,10 @@ using namespace llvm;
namespace {
class AtomicExpand: public FunctionPass {
- const TargetMachine *TM;
const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
- explicit AtomicExpand(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr) {
+ AtomicExpand() : FunctionPass(ID), TLI(nullptr) {
initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
}
@@ -97,12 +96,10 @@ namespace {
char AtomicExpand::ID = 0;
char &llvm::AtomicExpandID = AtomicExpand::ID;
-INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
- false, false)
+INITIALIZE_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
+ false, false)
-FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
- return new AtomicExpand(TM);
-}
+FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
namespace {
// Helper functions to retrieve the size of atomic instructions.
@@ -172,9 +169,14 @@ bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
} // end anonymous namespace
bool AtomicExpand::runOnFunction(Function &F) {
- if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
return false;
- TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ TLI = TM.getSubtargetImpl(F)->getTargetLowering();
SmallVector<Instruction *, 1> AtomicInsts;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 55a27e2fb79e..2b5863aa5800 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -49,7 +49,6 @@ add_llvm_library(LLVMCodeGen
LivePhysRegs.cpp
LiveRangeCalc.cpp
LiveRangeEdit.cpp
- LiveRangeShrink.cpp
LiveRegMatrix.cpp
LiveRegUnits.cpp
LiveStackAnalysis.cpp
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 4d30c6574b12..2a2715beaadc 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -43,7 +43,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
- initializeLiveRangeShrinkPass(Registry);
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index f2e024c5e3bd..3a1a3020a8d4 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
@@ -197,10 +198,11 @@ class TypePromotionTransaction;
public:
static char ID; // Pass identification, replacement for typeid
- explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
- initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
- }
+ CodeGenPrepare()
+ : FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr),
+ DL(nullptr) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "CodeGen Prepare"; }
@@ -255,15 +257,13 @@ class TypePromotionTransaction;
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
-FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
- return new CodeGenPrepare(TM);
-}
+FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
bool CodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
@@ -279,7 +279,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BPI.reset();
ModifiedDT = false;
- if (TM) {
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ TM = &TPC->getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
TLI = SubtargetInfo->getTargetLowering();
TRI = SubtargetInfo->getRegisterInfo();
@@ -349,7 +350,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
- delete I;
+ I->deleteValue();
EverMadeChange |= MadeChange;
}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 38af19a04448..1ef4d8660657 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
@@ -34,8 +35,6 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
class DwarfEHPrepare : public FunctionPass {
- const TargetMachine *TM;
-
// RewindFunction - _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
@@ -52,15 +51,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in
- // practice.
DwarfEHPrepare()
- : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr),
- TLI(nullptr) {}
-
- DwarfEHPrepare(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr),
- TLI(nullptr) {}
+ : FunctionPass(ID), RewindFunction(nullptr), DT(nullptr), TLI(nullptr) {
+ }
bool runOnFunction(Function &Fn) override;
@@ -78,18 +71,18 @@ namespace {
} // end anonymous namespace
char DwarfEHPrepare::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
- "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
+ "Prepare DWARF exceptions", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare",
- "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_END(DwarfEHPrepare, "dwarfehprepare",
+ "Prepare DWARF exceptions", false, false)
-FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
- return new DwarfEHPrepare(TM);
-}
+FunctionPass *llvm::createDwarfEHPass() { return new DwarfEHPrepare(); }
void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
}
@@ -254,9 +247,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
}
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
- assert(TM && "DWARF EH preparation requires a target machine");
+ const TargetMachine &TM =
+ getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
+ TLI = TM.getSubtargetImpl(Fn)->getTargetLowering();
bool Changed = InsertUnwindResumeCalls(Fn);
DT = nullptr;
TLI = nullptr;
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 77dfb13ac1f2..afc18a15aa1c 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -340,6 +340,15 @@ bool IRTranslator::translateExtractValue(const User &U,
Type *Int32Ty = Type::getInt32Ty(U.getContext());
SmallVector<Value *, 1> Indices;
+ // If Src is a single element ConstantStruct, translate extractvalue
+ // to that element to avoid inserting a cast instruction.
+ if (auto CS = dyn_cast<ConstantStruct>(Src))
+ if (CS->getNumOperands() == 1) {
+ unsigned Res = getOrCreateVReg(*CS->getOperand(0));
+ ValToVReg[&U] = Res;
+ return true;
+ }
+
// getIndexedOffsetInType is designed for GEPs, so the first index is the
// usual array element rather than looking into the actual aggregate.
Indices.push_back(ConstantInt::get(Int32Ty, 0));
@@ -1108,6 +1117,23 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
default:
return false;
}
+ } else if (auto CS = dyn_cast<ConstantStruct>(&C)) {
+ // Return the element if it is a single element ConstantStruct.
+ if (CS->getNumOperands() == 1) {
+ unsigned EltReg = getOrCreateVReg(*CS->getOperand(0));
+ EntryBuilder.buildCast(Reg, EltReg);
+ return true;
+ }
+ SmallVector<unsigned, 4> Ops;
+ SmallVector<uint64_t, 4> Indices;
+ uint64_t Offset = 0;
+ for (unsigned i = 0; i < CS->getNumOperands(); ++i) {
+ unsigned OpReg = getOrCreateVReg(*CS->getOperand(i));
+ Ops.push_back(OpReg);
+ Indices.push_back(Offset);
+ Offset += MRI->getType(OpReg).getSizeInBits();
+ }
+ EntryBuilder.buildSequence(Reg, Ops, Indices);
} else if (auto CV = dyn_cast<ConstantVector>(&C)) {
if (CV->getNumOperands() == 1)
return translate(*CV->getOperand(0), Reg);
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index c67da8629a3b..4c0b06dffd21 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -73,7 +73,7 @@ bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
- if (MO.getReg())
+ if (MO.isReg() && MO.getReg())
if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
return *VRegVal == Value;
return false;
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index ec35b3f6449e..bb29db301a95 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -45,6 +45,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Support/Debug.h"
@@ -68,8 +69,7 @@ class InterleavedAccess : public FunctionPass {
public:
static char ID;
- InterleavedAccess(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) {
+ InterleavedAccess() : FunctionPass(ID), DT(nullptr), TLI(nullptr) {
initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
}
@@ -84,7 +84,6 @@ public:
private:
DominatorTree *DT;
- const TargetMachine *TM;
const TargetLowering *TLI;
/// The maximum supported interleave factor.
@@ -108,18 +107,18 @@ private:
} // end anonymous namespace.
char InterleavedAccess::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(
+INITIALIZE_PASS_BEGIN(
InterleavedAccess, "interleaved-access",
"Lower interleaved memory accesses to target specific intrinsics", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_TM_PASS_END(
+INITIALIZE_PASS_END(
InterleavedAccess, "interleaved-access",
"Lower interleaved memory accesses to target specific intrinsics", false,
false)
-FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
- return new InterleavedAccess(TM);
+FunctionPass *llvm::createInterleavedAccessPass() {
+ return new InterleavedAccess();
}
/// \brief Check if the mask is a DE-interleave mask of the given factor
@@ -426,13 +425,15 @@ bool InterleavedAccess::lowerInterleavedStore(
}
bool InterleavedAccess::runOnFunction(Function &F) {
- if (!TM || !LowerInterleavedAccesses)
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC || !LowerInterleavedAccesses)
return false;
DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ auto &TM = TPC->getTM<TargetMachine>();
+ TLI = TM.getSubtargetImpl(F)->getTargetLowering();
MaxFactor = TLI->getMaxSupportedInterleaveFactor();
// Holds dead instructions that will be erased later.
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 7b1706f0f4ba..be3b258315bb 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -109,26 +109,24 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
AnalysisID StopAfter,
MachineFunctionInitializer *MFInitializer = nullptr) {
- // When in emulated TLS mode, add the LowerEmuTLS pass.
- if (TM->Options.EmulatedTLS)
- PM.add(createLowerEmuTLSPass(TM));
-
- PM.add(createPreISelIntrinsicLoweringPass());
-
- // Add internal analysis passes from the target machine.
- PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
-
// Targets may override createPassConfig to provide a target-specific
// subclass.
TargetPassConfig *PassConfig = TM->createPassConfig(PM);
PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore,
StopAfter);
-
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
-
PM.add(PassConfig);
+ // When in emulated TLS mode, add the LowerEmuTLS pass.
+ if (TM->Options.EmulatedTLS)
+ PM.add(createLowerEmuTLSPass());
+
+ PM.add(createPreISelIntrinsicLoweringPass());
+
+ // Add internal analysis passes from the target machine.
+ PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+
PassConfig->addIRPasses();
PassConfig->addCodeGenPrepare();
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
deleted file mode 100644
index 00182e2c779f..000000000000
--- a/lib/CodeGen/LiveRangeShrink.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-///===---------------------------------------------------------------------===//
-///
-/// \file
-/// This pass moves instructions close to the definition of its operands to
-/// shrink live range of the def instruction. The code motion is limited within
-/// the basic block. The moved instruction should have 1 def, and more than one
-/// uses, all of which are the only use of the def.
-///
-///===---------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "lrshrink"
-
-STATISTIC(NumInstrsHoistedToShrinkLiveRange,
- "Number of insructions hoisted to shrink live range.");
-
-using namespace llvm;
-
-namespace {
-class LiveRangeShrink : public MachineFunctionPass {
-public:
- static char ID;
-
- LiveRangeShrink() : MachineFunctionPass(ID) {
- initializeLiveRangeShrinkPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- StringRef getPassName() const override { return "Live Range Shrink"; }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-} // End anonymous namespace.
-
-char LiveRangeShrink::ID = 0;
-char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID;
-
-INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false,
- false)
-namespace {
-typedef DenseMap<MachineInstr *, unsigned> InstOrderMap;
-
-/// Returns \p New if it's dominated by \p Old, otherwise return \p Old.
-/// \p M maintains a map from instruction to its dominating order that satisfies
-/// M[A] > M[B] guarantees that A is dominated by B.
-/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return
-/// \p New.
-MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old,
- const InstOrderMap &M) {
- auto NewIter = M.find(&New);
- if (NewIter == M.end())
- return Old;
- if (Old == nullptr)
- return &New;
- unsigned OrderOld = M.find(Old)->second;
- unsigned OrderNew = NewIter->second;
- if (OrderOld != OrderNew)
- return OrderOld < OrderNew ? &New : Old;
- // OrderOld == OrderNew, we need to iterate down from Old to see if it
- // can reach New, if yes, New is dominated by Old.
- for (MachineInstr *I = Old->getNextNode(); M.find(I)->second == OrderNew;
- I = I->getNextNode())
- if (I == &New)
- return &New;
- return Old;
-}
-
-/// Builds Instruction to its dominating order number map \p M by traversing
-/// from instruction \p Start.
-void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) {
- M.clear();
- unsigned i = 0;
- for (MachineInstr &I : make_range(Start, Start->getParent()->end()))
- M[&I] = i++;
-}
-} // end anonymous namespace
-
-bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()))
- return false;
-
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
-
- InstOrderMap IOM;
- // Map from register to instruction order (value of IOM) where the
- // register is used last. When moving instructions up, we need to
- // make sure all its defs (including dead def) will not cross its
- // last use when moving up.
- DenseMap<unsigned, unsigned> UseMap;
-
- for (MachineBasicBlock &MBB : MF) {
- if (MBB.empty())
- continue;
- bool SawStore = false;
- BuildInstOrderMap(MBB.begin(), IOM);
- UseMap.clear();
-
- for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
- MachineInstr &MI = *Next;
- ++Next;
- if (MI.isPHI() || MI.isDebugValue())
- continue;
- if (MI.mayStore())
- SawStore = true;
-
- unsigned CurrentOrder = IOM[&MI];
- unsigned Barrier = 0;
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.isDebug())
- continue;
- if (MO.isUse())
- UseMap[MO.getReg()] = CurrentOrder;
- else if (MO.isDead() && UseMap.count(MO.getReg()))
- // Barrier is the last instruction where MO get used. MI should not
- // be moved above Barrier.
- Barrier = std::max(Barrier, UseMap[MO.getReg()]);
- }
-
- if (!MI.isSafeToMove(nullptr, SawStore)) {
- // If MI has side effects, it should become a barrier for code motion.
- // IOM is rebuild from the next instruction to prevent later
- // instructions from being moved before this MI.
- if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
- BuildInstOrderMap(Next, IOM);
- SawStore = false;
- }
- continue;
- }
-
- const MachineOperand *DefMO = nullptr;
- MachineInstr *Insert = nullptr;
-
- // Number of live-ranges that will be shortened. We do not count
- // live-ranges that are defined by a COPY as it could be coalesced later.
- unsigned NumEligibleUse = 0;
-
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.isDead() || MO.isDebug())
- continue;
- unsigned Reg = MO.getReg();
- // Do not move the instruction if it def/uses a physical register,
- // unless it is a constant physical register.
- if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
- !MRI.isConstantPhysReg(Reg)) {
- Insert = nullptr;
- break;
- }
- if (MO.isDef()) {
- // Do not move if there is more than one def.
- if (DefMO) {
- Insert = nullptr;
- break;
- }
- DefMO = &MO;
- } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg)) {
- MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
- if (!DefInstr.isCopy())
- NumEligibleUse++;
- Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
- } else {
- Insert = nullptr;
- break;
- }
- }
- // Move the instruction when # of shrunk live range > 1.
- if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
- MachineBasicBlock::iterator I = std::next(Insert->getIterator());
- // Skip all the PHI and debug instructions.
- while (I != MBB.end() && (I->isPHI() || I->isDebugValue()))
- I = std::next(I);
- if (I == MI.getIterator())
- continue;
-
- // Update the dominator order to be the same as the insertion point.
- // We do this to maintain a non-decreasing order without need to update
- // all instruction orders after the insertion point.
- unsigned NewOrder = IOM[&*I];
- IOM[&MI] = NewOrder;
- NumInstrsHoistedToShrinkLiveRange++;
-
- // Find MI's debug value following MI.
- MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
- if (MI.getOperand(0).isReg())
- for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
- EndIter->getOperand(0).isReg() &&
- EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg();
- ++EndIter, ++Next)
- IOM[&*EndIter] = NewOrder;
- MBB.splice(I, &MBB, MI.getIterator(), EndIter);
- }
- }
- }
- return false;
-}
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index 6966c8ca4a5f..5fb5b747f471 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
@@ -28,14 +29,12 @@ using namespace llvm;
namespace {
class LowerEmuTLS : public ModulePass {
- const TargetMachine *TM;
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerEmuTLS() : ModulePass(ID), TM(nullptr) { }
- explicit LowerEmuTLS(const TargetMachine *TM)
- : ModulePass(ID), TM(TM) {
+ LowerEmuTLS() : ModulePass(ID) {
initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry());
}
+
bool runOnModule(Module &M) override;
private:
bool addEmuTlsVar(Module &M, const GlobalVariable *GV);
@@ -55,18 +54,21 @@ private:
char LowerEmuTLS::ID = 0;
INITIALIZE_PASS(LowerEmuTLS, "loweremutls",
- "Add __emutls_[vt]. variables for emultated TLS model",
- false, false)
+ "Add __emutls_[vt]. variables for emultated TLS model", false,
+ false)
-ModulePass *llvm::createLowerEmuTLSPass(const TargetMachine *TM) {
- return new LowerEmuTLS(TM);
-}
+ModulePass *llvm::createLowerEmuTLSPass() { return new LowerEmuTLS(); }
bool LowerEmuTLS::runOnModule(Module &M) {
if (skipModule(M))
return false;
- if (!TM || !TM->Options.EmulatedTLS)
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (!TM.Options.EmulatedTLS)
return false;
bool Changed = false;
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 5003115a770f..adfca9a46239 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -245,25 +245,26 @@ public:
/// updating the block -> chain mapping. It does not free or tear down the
/// old chain, but the old chain's block list is no longer valid.
void merge(MachineBasicBlock *BB, BlockChain *Chain) {
- assert(BB);
- assert(!Blocks.empty());
+ assert(BB && "Can't merge a null block.");
+ assert(!Blocks.empty() && "Can't merge into an empty chain.");
// Fast path in case we don't have a chain already.
if (!Chain) {
- assert(!BlockToChain[BB]);
+ assert(!BlockToChain[BB] &&
+ "Passed chain is null, but BB has entry in BlockToChain.");
Blocks.push_back(BB);
BlockToChain[BB] = this;
return;
}
- assert(BB == *Chain->begin());
+ assert(BB == *Chain->begin() && "Passed BB is not head of Chain.");
assert(Chain->begin() != Chain->end());
// Update the incoming blocks to point to this chain, and add them to the
// chain structure.
for (MachineBasicBlock *ChainBB : *Chain) {
Blocks.push_back(ChainBB);
- assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
+ assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain.");
BlockToChain[ChainBB] = this;
}
}
@@ -1547,13 +1548,15 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
for (MachineBasicBlock *MBB : WorkList) {
- assert(MBB->isEHPad() == IsEHPad);
+ assert(MBB->isEHPad() == IsEHPad &&
+ "EHPad mismatch between block and work list.");
BlockChain &SuccChain = *BlockToChain[MBB];
if (&SuccChain == &Chain)
continue;
- assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block");
+ assert(SuccChain.UnscheduledPredecessors == 0 &&
+ "Found CFG-violating block");
BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
@@ -1621,9 +1624,12 @@ void MachineBlockPlacement::fillWorkLists(
if (!UpdatedPreds.insert(&Chain).second)
return;
- assert(Chain.UnscheduledPredecessors == 0);
+ assert(
+ Chain.UnscheduledPredecessors == 0 &&
+ "Attempting to place block with unscheduled predecessors in worklist.");
for (MachineBasicBlock *ChainBB : Chain) {
- assert(BlockToChain[ChainBB] == &Chain);
+ assert(BlockToChain[ChainBB] == &Chain &&
+ "Block in chain doesn't match BlockToChain map.");
for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
if (BlockFilter && !BlockFilter->count(Pred))
continue;
@@ -2136,8 +2142,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
for (const MachineLoop *InnerLoop : L)
buildLoopChains(*InnerLoop);
- assert(BlockWorkList.empty());
- assert(EHPadWorkList.empty());
+ assert(BlockWorkList.empty() &&
+ "BlockWorkList not empty when starting to build loop chains.");
+ assert(EHPadWorkList.empty() &&
+ "EHPadWorkList not empty when starting to build loop chains.");
BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
// Check if we have profile data for this function. If yes, we will rotate
@@ -2167,7 +2175,8 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// walk the blocks, and use a set to prevent visiting a particular chain
// twice.
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- assert(LoopChain.UnscheduledPredecessors == 0);
+ assert(LoopChain.UnscheduledPredecessors == 0 &&
+ "LoopChain should not have unscheduled predecessors.");
UpdatedPreds.insert(&LoopChain);
for (const MachineBasicBlock *LoopBB : LoopBlockSet)
@@ -2256,8 +2265,10 @@ void MachineBlockPlacement::buildCFGChains() {
for (MachineLoop *L : *MLI)
buildLoopChains(*L);
- assert(BlockWorkList.empty());
- assert(EHPadWorkList.empty());
+ assert(BlockWorkList.empty() &&
+ "BlockWorkList should be empty before building final chain.");
+ assert(EHPadWorkList.empty() &&
+ "EHPadWorkList should be empty before building final chain.");
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
for (MachineBasicBlock &MBB : *F)
@@ -2651,8 +2662,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
// there are no MachineLoops.
PreferredLoopExit = nullptr;
- assert(BlockToChain.empty());
- assert(ComputedEdges.empty());
+ assert(BlockToChain.empty() &&
+ "BlockToChain map should be empty before starting placement.");
+ assert(ComputedEdges.empty() &&
+ "Computed Edge map should be empty before starting placement.");
unsigned TailDupSize = TailDupPlacementThreshold;
// If only the aggressive threshold is explicitly set, use it.
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 2f0f4297ef5c..6cf751d34e26 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -32,8 +32,8 @@ using namespace llvm;
using namespace llvm::dwarf;
// Handle the Pass registration stuff necessary to use DataLayout's.
-INITIALIZE_TM_PASS(MachineModuleInfo, "machinemoduleinfo",
- "Machine Module Information", false, false)
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false)
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index d2afeae9e70b..aaa253fde494 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -60,19 +60,8 @@ namespace {
class PEI : public MachineFunctionPass {
public:
static char ID;
- explicit PEI(const TargetMachine *TM = nullptr) : MachineFunctionPass(ID) {
+ PEI() : MachineFunctionPass(ID) {
initializePEIPass(*PassRegistry::getPassRegistry());
-
- if (TM && (!TM->usesPhysRegsForPEI())) {
- SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
- unsigned &, unsigned &, const MBBVector &,
- const MBBVector &) {};
- ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {};
- } else {
- SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
- ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs;
- UsesCalleeSaves = true;
- }
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -140,18 +129,17 @@ WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
cl::desc("Warn for stack size bigger than the given"
" number"));
-INITIALIZE_TM_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion",
- false, false)
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false,
+ false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_TM_PASS_END(PEI, "prologepilog",
- "Prologue/Epilogue Insertion & Frame Finalization",
- false, false)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion & Frame Finalization", false,
+ false)
-MachineFunctionPass *
-llvm::createPrologEpilogInserterPass(const TargetMachine *TM) {
- return new PEI(TM);
+MachineFunctionPass *llvm::createPrologEpilogInserterPass() {
+ return new PEI();
}
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
@@ -174,6 +162,20 @@ typedef SmallSetVector<int, 8> StackObjSet;
/// frame indexes with appropriate references.
///
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ if (!SpillCalleeSavedRegisters) {
+ const TargetMachine &TM = Fn.getTarget();
+ if (!TM.usesPhysRegsForPEI()) {
+ SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
+ unsigned &, unsigned &, const MBBVector &,
+ const MBBVector &) {};
+ ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {};
+ } else {
+ SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
+ ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs;
+ UsesCalleeSaves = true;
+ }
+ }
+
const Function* F = Fn.getFunction();
const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 1803ea2b9249..7b3a5d5c5ff7 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -2666,11 +2666,17 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// Look for values being erased.
bool DidPrune = false;
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
- if (Vals[i].Resolution != CR_Erase)
+ // We should trigger in all cases in which eraseInstrs() does something.
+ // match what eraseInstrs() is doing, print a message so
+ if (Vals[i].Resolution != CR_Erase &&
+ (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef ||
+ !Vals[i].Pruned))
continue;
// Check subranges at the point where the copy will be removed.
SlotIndex Def = LR.getValNumInfo(i)->def;
+ // Print message so mismatches with eraseInstrs() can be diagnosed.
+ DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n');
for (LiveInterval::SubRange &S : LI.subranges()) {
LiveQueryResult Q = S.Query(Def);
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index 08b3d345f689..2771fdbd737a 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -767,13 +768,12 @@ class SafeStackLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid..
- SafeStackLegacyPass(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {
+ SafeStackLegacyPass() : FunctionPass(ID), TM(nullptr) {
initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry());
}
- SafeStackLegacyPass() : SafeStackLegacyPass(nullptr) {}
-
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
}
@@ -793,8 +793,7 @@ public:
return false;
}
- if (!TM)
- report_fatal_error("Target machine is required");
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
if (!TL)
report_fatal_error("TargetLowering instance is required");
@@ -821,11 +820,10 @@ public:
} // anonymous namespace
char SafeStackLegacyPass::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(SafeStackLegacyPass, "safe-stack",
- "Safe Stack instrumentation pass", false, false)
-INITIALIZE_TM_PASS_END(SafeStackLegacyPass, "safe-stack",
- "Safe Stack instrumentation pass", false, false)
+INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(SafeStackLegacyPass, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
-FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) {
- return new SafeStackLegacyPass(TM);
-}
+FunctionPass *llvm::createSafeStackPass() { return new SafeStackLegacyPass(); }
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 09289f947dc9..21f2fa497233 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -20,9 +20,10 @@ using namespace llvm::safestack;
#define DEBUG_TYPE "safestackcoloring"
+// Disabled by default due to PR32143.
static cl::opt<bool> ClColoring("safe-stack-coloring",
cl::desc("enable safe stack coloring"),
- cl::Hidden, cl::init(true));
+ cl::Hidden, cl::init(false));
const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
const auto IT = AllocaNumbering.find(AI);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0ccee175abfb..5d450e7e078c 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2138,6 +2138,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
if (isNullConstant(CarryIn))
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
+ // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
+ if (isNullConstant(N0) && isNullConstant(N1)) {
+ EVT VT = N0.getValueType();
+ EVT CarryVT = CarryIn.getValueType();
+ SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
+ AddToWorklist(CarryExt.getNode());
+ return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
+ DAG.getConstant(1, DL, VT)),
+ DAG.getConstant(0, DL, CarryVT));
+ }
+
if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
return Combined;
@@ -12533,49 +12544,54 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// mergeable cases. To prevent this, we prune such stores from the
// front of StoreNodes here.
- unsigned StartIdx = 0;
- while ((StartIdx + 1 < StoreNodes.size()) &&
- StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
- StoreNodes[StartIdx + 1].OffsetFromBase)
- ++StartIdx;
-
- // Bail if we don't have enough candidates to merge.
- if (StartIdx + 1 >= StoreNodes.size())
- return false;
-
- if (StartIdx)
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
+ bool RV = false;
+ while (StoreNodes.size() > 1) {
+ unsigned StartIdx = 0;
+ while ((StartIdx + 1 < StoreNodes.size()) &&
+ StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
+ StoreNodes[StartIdx + 1].OffsetFromBase)
+ ++StartIdx;
- // Scan the memory operations on the chain and find the first non-consecutive
- // store memory address.
- unsigned NumConsecutiveStores = 0;
- int64_t StartAddress = StoreNodes[0].OffsetFromBase;
-
- // Check that the addresses are consecutive starting from the second
- // element in the list of stores.
- for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
- int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- NumConsecutiveStores = i + 1;
- }
+ // Bail if we don't have enough candidates to merge.
+ if (StartIdx + 1 >= StoreNodes.size())
+ return RV;
- if (NumConsecutiveStores < 2)
- return false;
+ if (StartIdx)
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
+
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive store memory address.
+ unsigned NumConsecutiveStores = 1;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ NumConsecutiveStores = i + 1;
+ }
- // Check that we can merge these candidates without causing a cycle
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumConsecutiveStores))
- return false;
+ if (NumConsecutiveStores < 2) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumConsecutiveStores);
+ continue;
+ }
+ // Check that we can merge these candidates without causing a cycle
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
+ NumConsecutiveStores)) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumConsecutiveStores);
+ continue;
+ }
- // The node with the lowest store address.
- LLVMContext &Context = *DAG.getContext();
- const DataLayout &DL = DAG.getDataLayout();
+ // The node with the lowest store address.
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
- // Store the constants into memory as one consecutive store.
- if (IsConstantSrc) {
- bool RV = false;
- while (NumConsecutiveStores > 1) {
+ // Store the constants into memory as one consecutive store.
+ if (IsConstantSrc) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
@@ -12635,33 +12651,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
// Check if we found a legal integer type that creates a meaningful merge.
- if (LastLegalType < 2 && LastLegalVectorType < 2)
- break;
+ if (LastLegalType < 2 && LastLegalVectorType < 2) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ continue;
+ }
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
true, UseVector);
- if (!Merged)
- break;
+ if (!Merged) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ continue;
+ }
// Remove merged stores for next iteration.
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
RV = true;
- NumConsecutiveStores -= NumElem;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ continue;
}
- return RV;
- }
- // When extracting multiple vector elements, try to store them
- // in one vector store rather than a sequence of scalar stores.
- if (IsExtractVecSrc) {
- bool RV = false;
- while (StoreNodes.size() >= 2) {
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecSrc) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned NumStoresToMerge = 0;
+ unsigned NumStoresToMerge = 1;
bool IsVec = MemVT.isVector();
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
@@ -12673,7 +12689,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// handles consecutive loads).
if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
- return false;
+ return RV;
// Find a legal type for the vector store.
unsigned Elts = i + 1;
@@ -12693,187 +12709,205 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumStoresToMerge, false, true);
- if (!Merged)
- break;
+ if (!Merged) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ continue;
+ }
// Remove merged stores for next iteration.
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
RV = true;
- NumConsecutiveStores -= NumStoresToMerge;
+ continue;
}
- return RV;
- }
- // Below we handle the case of multiple consecutive stores that
- // come from multiple consecutive loads. We merge them into a single
- // wide load and a single wide store.
+ // Below we handle the case of multiple consecutive stores that
+ // come from multiple consecutive loads. We merge them into a single
+ // wide load and a single wide store.
- // Look for load nodes which are used by the stored values.
- SmallVector<MemOpLink, 8> LoadNodes;
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
- // Find acceptable loads. Loads need to have the same chain (token factor),
- // must not be zext, volatile, indexed, and they must be consecutive.
- BaseIndexOffset LdBasePtr;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
- if (!Ld) break;
-
- // Loads must only have one use.
- if (!Ld->hasNUsesOfValue(1, 0))
- break;
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+ if (!Ld)
+ break;
- // The memory operands must not be volatile.
- if (Ld->isVolatile() || Ld->isIndexed())
- break;
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ break;
- // We do not accept ext loads.
- if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
- break;
+ // The memory operands must not be volatile.
+ if (Ld->isVolatile() || Ld->isIndexed())
+ break;
- // The stored memory type must be the same.
- if (Ld->getMemoryVT() != MemVT)
- break;
+ // We do not accept ext loads.
+ if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
- // If this is not the first ptr that we check.
- if (LdBasePtr.Base.getNode()) {
- // The base ptr must be the same.
- if (!LdPtr.equalBaseIndex(LdBasePtr))
+ // The stored memory type must be the same.
+ if (Ld->getMemoryVT() != MemVT)
break;
- } else {
- // Check that all other base pointers are the same as this one.
- LdBasePtr = LdPtr;
+
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ // If this is not the first ptr that we check.
+ if (LdBasePtr.Base.getNode()) {
+ // The base ptr must be the same.
+ if (!LdPtr.equalBaseIndex(LdBasePtr))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
+ }
+
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
}
- // We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
- }
+ if (LoadNodes.size() < 2) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ continue;
+ }
- if (LoadNodes.size() < 2)
- return false;
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother.
+ unsigned RequiredAlignment;
+ if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ St->getAlignment() >= RequiredAlignment) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+ continue;
+ }
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive load memory address. These variables hold the index in
+ // the store node array.
+ unsigned LastConsecutiveLoad = 0;
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 0;
+ unsigned LastLegalIntegerType = 0;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
- // If we have load/store pair instructions and we only have two values,
- // don't bother.
- unsigned RequiredAlignment;
- if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
- St->getAlignment() >= RequiredAlignment)
- return false;
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAS = FirstLoad->getAddressSpace();
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
-
- // Scan the memory operations on the chain and find the first non-consecutive
- // load memory address. These variables hold the index in the store node
- // array.
- unsigned LastConsecutiveLoad = 0;
- // This variable refers to the size and not index in the array.
- unsigned LastLegalVectorType = 0;
- unsigned LastLegalIntegerType = 0;
- StartAddress = LoadNodes[0].OffsetFromBase;
- SDValue FirstChain = FirstLoad->getChain();
- for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads must share the same chain.
- if (LoadNodes[i].MemNode->getChain() != FirstChain)
- break;
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
+ // Find a legal type for the vector store.
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
+ bool IsFastSt, IsFastLd;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
+ IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
- int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- LastConsecutiveLoad = i;
- // Find a legal type for the vector store.
- EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
- bool IsFastSt, IsFastLd;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) && IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) && IsFastLd) {
- LastLegalVectorType = i + 1;
- }
-
- // Find a legal type for the integer store.
- unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) && IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) && IsFastLd)
- LastLegalIntegerType = i + 1;
- // Or check whether a truncstore and extload is legal.
- else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(Context, StoreTy);
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
IsFastLd)
- LastLegalIntegerType = i+1;
+ LastLegalIntegerType = i + 1;
+ // Or check whether a truncstore and extload is legal.
+ else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
+ IsFastLd)
+ LastLegalIntegerType = i + 1;
+ }
}
- }
- // Only use vector types if the vector type is larger than the integer type.
- // If they are the same, use integers.
- bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
- unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
+ // Only use vector types if the vector type is larger than the integer type.
+ // If they are the same, use integers.
+ bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
+ unsigned LastLegalType =
+ std::max(LastLegalVectorType, LastLegalIntegerType);
- // We add +1 here because the LastXXX variables refer to location while
- // the NumElem refers to array/index size.
- unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
- NumElem = std::min(LastLegalType, NumElem);
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+ NumElem = std::min(LastLegalType, NumElem);
- if (NumElem < 2)
- return false;
+ if (NumElem < 2) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ continue;
+ }
- // Find if it is better to use vectors or integers to load and store
- // to memory.
- EVT JointMemOpVT;
- if (UseVectorTy) {
- JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
- } else {
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
- }
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
- SDLoc LoadDL(LoadNodes[0].MemNode);
- SDLoc StoreDL(StoreNodes[0].MemNode);
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
- // The merged loads are required to have the same incoming chain, so
- // using the first's chain is acceptable.
- SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
- FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign);
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+ SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign);
- SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
- AddToWorklist(NewStoreChain.getNode());
+ AddToWorklist(NewStoreChain.getNode());
- SDValue NewStore =
- DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
+ SDValue NewStore = DAG.getStore(
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign);
- // Transfer chain users from old loads to the new load.
- for (unsigned i = 0; i < NumElem; ++i) {
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
- }
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
- // Replace the all stores with the new store.
- for (unsigned i = 0; i < NumElem; ++i)
- CombineTo(StoreNodes[i].MemNode, NewStore);
- return true;
+ // Replace the all stores with the new store.
+ for (unsigned i = 0; i < NumElem; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ RV = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ continue;
+ }
+ return RV;
}
SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 057badcd6b74..16c1f78f1b35 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4685,9 +4685,10 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
- const TargetLowering &TLI, StringRef Str) {
+ const TargetLowering &TLI,
+ const ConstantDataArraySlice &Slice) {
// Handle vector with all elements zero.
- if (Str.empty()) {
+ if (Slice.Array == nullptr) {
if (VT.isInteger())
return DAG.getConstant(0, dl, VT);
else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
@@ -4706,15 +4707,15 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
assert(!VT.isVector() && "Can't handle vector type here!");
unsigned NumVTBits = VT.getSizeInBits();
unsigned NumVTBytes = NumVTBits / 8;
- unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length));
APInt Val(NumVTBits, 0);
if (DAG.getDataLayout().isLittleEndian()) {
for (unsigned i = 0; i != NumBytes; ++i)
- Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ Val |= (uint64_t)(unsigned char)Slice[i] << i*8;
} else {
for (unsigned i = 0; i != NumBytes; ++i)
- Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
+ Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8;
}
// If the "cost" of materializing the integer immediate is less than the cost
@@ -4731,9 +4732,8 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
}
-/// isMemSrcFromString - Returns true if memcpy source is a string constant.
-///
-static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
+/// Returns true if memcpy source is constant data.
+static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
uint64_t SrcDelta = 0;
GlobalAddressSDNode *G = nullptr;
if (Src.getOpcode() == ISD::GlobalAddress)
@@ -4747,8 +4747,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
if (!G)
return false;
- return getConstantStringInfo(G->getGlobal(), Str,
- SrcDelta + G->getOffset(), false);
+ return getConstantDataArrayInfo(G->getGlobal(), Slice, 8,
+ SrcDelta + G->getOffset());
}
/// Determines the optimal series of memory ops to replace the memset / memcpy.
@@ -4891,15 +4891,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- StringRef Str;
- bool CopyFromStr = isMemSrcFromString(Src, Str);
- bool isZeroStr = CopyFromStr && Str.empty();
+ ConstantDataArraySlice Slice;
+ bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
+ bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
- (isZeroStr ? 0 : SrcAlign),
- false, false, CopyFromStr, true,
+ (isZeroConstant ? 0 : SrcAlign),
+ false, false, CopyFromConstant, true,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
DAG, TLI))
@@ -4943,18 +4943,29 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
DstOff -= VTSize - Size;
}
- if (CopyFromStr &&
- (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
+ if (CopyFromConstant &&
+ (isZeroConstant || (VT.isInteger() && !VT.isVector()))) {
// It's unlikely a store of a vector immediate can be done in a single
// instruction. It would require a load from a constantpool first.
// We only handle zero vectors here.
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
- Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
+ ConstantDataArraySlice SubSlice;
+ if (SrcOff < Slice.Length) {
+ SubSlice = Slice;
+ SubSlice.move(SrcOff);
+ } else {
+ // This is an out-of-bounds access and hence UB. Pretend we read zero.
+ SubSlice.Array = nullptr;
+ SubSlice.Offset = 0;
+ SubSlice.Length = VTSize;
+ }
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
if (Value.getNode())
Store = DAG.getStore(Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), Align,
+ MMOFlags);
}
if (!Store.getNode()) {
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index c0a5041b1395..1c66649cae01 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -110,8 +110,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
Builder.FuncInfo.StatepointStackSlots.size() &&
"Broken invariant");
- StatepointMaxSlotsRequired = std::max<unsigned long>(
- StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size());
+ StatepointMaxSlotsRequired.updateMax(
+ Builder.FuncInfo.StatepointStackSlots.size());
return SpillSlot;
}
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index a8aafe78748d..5da77264261b 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -58,12 +58,13 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
cl::init(true), cl::Hidden);
char StackProtector::ID = 0;
-INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors",
- false, true)
+INITIALIZE_PASS_BEGIN(StackProtector, "stack-protector",
+ "Insert stack protectors", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(StackProtector, "stack-protector",
+ "Insert stack protectors", false, true)
-FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
- return new StackProtector(TM);
-}
+FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); }
StackProtector::SSPLayoutKind
StackProtector::getSSPLayout(const AllocaInst *AI) const {
@@ -97,6 +98,8 @@ bool StackProtector::runOnFunction(Function &Fn) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ Trip = TM->getTargetTriple();
TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
HasPrologue = false;
HasIRCheck = false;
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 9724cb074584..83348058eca9 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -315,7 +315,9 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
TargetPassConfig::TargetPassConfig()
: ImmutablePass(ID), PM(nullptr) {
- llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+ report_fatal_error("Trying to construct TargetPassConfig without a target "
+ "machine. Scheduling a CodeGen pass without a target "
+ "triple set?");
}
// Helper to verify the analysis is really immutable.
@@ -514,14 +516,14 @@ void TargetPassConfig::addPassesToHandleExceptions() {
LLVM_FALLTHROUGH;
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- addPass(createDwarfEHPass(TM));
+ addPass(createDwarfEHPass());
break;
case ExceptionHandling::WinEH:
// We support using both GCC-style and MSVC-style exceptions on Windows, so
// add both preparation passes. Each pass will only actually run if it
// recognizes the personality function.
- addPass(createWinEHPass(TM));
- addPass(createDwarfEHPass(TM));
+ addPass(createWinEHPass());
+ addPass(createDwarfEHPass());
break;
case ExceptionHandling::None:
addPass(createLowerInvokePass());
@@ -536,7 +538,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
/// before exception handling preparation passes.
void TargetPassConfig::addCodeGenPrepare() {
if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
- addPass(createCodeGenPreparePass(TM));
+ addPass(createCodeGenPreparePass());
addPass(createRewriteSymbolsPass());
}
@@ -551,8 +553,8 @@ void TargetPassConfig::addISelPrepare() {
// Add both the safe stack and the stack protection passes: each of them will
// only protect functions that have corresponding attributes.
- addPass(createSafeStackPass(TM));
- addPass(createStackProtectorPass(TM));
+ addPass(createSafeStackPass());
+ addPass(createStackProtectorPass());
if (PrintISelInput)
addPass(createPrintFunctionPass(
@@ -623,9 +625,6 @@ void TargetPassConfig::addMachinePasses() {
addPass(&LocalStackSlotAllocationID, false);
}
- if (getOptLevel() != CodeGenOpt::None)
- addPass(&LiveRangeShrinkID);
-
// Run pre-ra passes.
addPreRegAlloc();
@@ -650,7 +649,7 @@ void TargetPassConfig::addMachinePasses() {
// Prolog/Epilog inserter needs a TargetMachine to instantiate. But only
// do so if it hasn't been disabled, substituted, or overridden.
if (!isPassSubstitutedOrOverridden(&PrologEpilogCodeInserterID))
- addPass(createPrologEpilogInserterPass(TM));
+ addPass(createPrologEpilogInserterPass());
/// Add passes that optimize machine instructions after register allocation.
if (getOptLevel() != CodeGenOpt::None)
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index ae07e8b2fa03..a632b40c20f5 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -54,7 +54,7 @@ namespace {
class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {}
+ WinEHPrepare() : FunctionPass(ID) {}
bool runOnFunction(Function &Fn) override;
@@ -94,12 +94,10 @@ private:
} // end anonymous namespace
char WinEHPrepare::ID = 0;
-INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions",
- false, false)
+INITIALIZE_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions",
+ false, false)
-FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
- return new WinEHPrepare(TM);
-}
+FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); }
bool WinEHPrepare::runOnFunction(Function &Fn) {
if (!Fn.hasPersonalityFn())
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index 8d9353ae5f5e..556ebf78622f 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -2,10 +2,10 @@ add_llvm_library(LLVMDebugInfoCodeView
CodeViewError.cpp
CodeViewRecordIO.cpp
CVSymbolVisitor.cpp
- CVTypeDumper.cpp
CVTypeVisitor.cpp
EnumTables.cpp
Formatters.cpp
+ LazyRandomTypeCollection.cpp
Line.cpp
ModuleDebugFileChecksumFragment.cpp
ModuleDebugFragment.cpp
@@ -13,7 +13,6 @@ add_llvm_library(LLVMDebugInfoCodeView
ModuleDebugFragmentVisitor.cpp
ModuleDebugInlineeLinesFragment.cpp
ModuleDebugLineFragment.cpp
- RandomAccessTypeVisitor.cpp
RecordSerialization.cpp
StringTable.cpp
SymbolRecordMapping.cpp
@@ -22,10 +21,12 @@ add_llvm_library(LLVMDebugInfoCodeView
TypeDatabase.cpp
TypeDatabaseVisitor.cpp
TypeDumpVisitor.cpp
+ TypeIndex.cpp
TypeRecordMapping.cpp
TypeSerializer.cpp
TypeStreamMerger.cpp
-
+ TypeTableCollection.cpp
+
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/CodeView
)
diff --git a/lib/DebugInfo/CodeView/CVTypeDumper.cpp b/lib/DebugInfo/CodeView/CVTypeDumper.cpp
deleted file mode 100644
index 02e1682f76e7..000000000000
--- a/lib/DebugInfo/CodeView/CVTypeDumper.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//===-- CVTypeDumper.cpp - CodeView type info dumper ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
-#include "llvm/Support/BinaryByteStream.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-Error CVTypeDumper::dump(const CVType &Record, TypeVisitorCallbacks &Dumper) {
- TypeDatabaseVisitor DBV(TypeDB);
- TypeVisitorCallbackPipeline Pipeline;
- Pipeline.addCallbackToPipeline(DBV);
- Pipeline.addCallbackToPipeline(Dumper);
-
- CVType RecordCopy = Record;
- return codeview::visitTypeRecord(RecordCopy, Pipeline, VDS_BytesPresent,
- Handler);
-}
-
-Error CVTypeDumper::dump(const CVTypeArray &Types,
- TypeVisitorCallbacks &Dumper) {
- TypeDatabaseVisitor DBV(TypeDB);
- TypeVisitorCallbackPipeline Pipeline;
- Pipeline.addCallbackToPipeline(DBV);
- Pipeline.addCallbackToPipeline(Dumper);
-
- return codeview::visitTypeStream(Types, Pipeline, Handler);
-}
-
-Error CVTypeDumper::dump(ArrayRef<uint8_t> Data, TypeVisitorCallbacks &Dumper) {
- BinaryByteStream Stream(Data, llvm::support::little);
- CVTypeArray Types;
- BinaryStreamReader Reader(Stream);
- if (auto EC = Reader.readArray(Types, Reader.getLength()))
- return EC;
-
- return dump(Types, Dumper);
-}
-
-void CVTypeDumper::printTypeIndex(ScopedPrinter &Printer, StringRef FieldName,
- TypeIndex TI, TypeDatabase &DB) {
- StringRef TypeName;
- if (!TI.isNoneType())
- TypeName = DB.getTypeName(TI);
- if (!TypeName.empty())
- Printer.printHex(FieldName, TypeName, TI.getIndex());
- else
- Printer.printHex(FieldName, TI.getIndex());
-}
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index 0f7f5f667790..f95c3e79388e 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -9,7 +9,9 @@
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
@@ -22,8 +24,6 @@
using namespace llvm;
using namespace llvm::codeview;
-CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks)
- : Callbacks(Callbacks) {}
template <typename T>
static Error visitKnownRecord(CVType &Record, TypeVisitorCallbacks &Callbacks) {
@@ -66,6 +66,67 @@ static Expected<TypeServer2Record> deserializeTypeServerRecord(CVType &Record) {
return R;
}
+static Error visitMemberRecord(CVMemberRecord &Record,
+ TypeVisitorCallbacks &Callbacks) {
+ if (auto EC = Callbacks.visitMemberBegin(Record))
+ return EC;
+
+ switch (Record.Kind) {
+ default:
+ if (auto EC = Callbacks.visitUnknownMember(Record))
+ return EC;
+ break;
+#define MEMBER_RECORD(EnumName, EnumVal, Name) \
+ case EnumName: { \
+ if (auto EC = visitKnownMember<Name##Record>(Record, Callbacks)) \
+ return EC; \
+ break; \
+ }
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \
+ MEMBER_RECORD(EnumVal, EnumVal, AliasName)
+#define TYPE_RECORD(EnumName, EnumVal, Name)
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+ }
+
+ if (auto EC = Callbacks.visitMemberEnd(Record))
+ return EC;
+
+ return Error::success();
+}
+
+namespace {
+
+class CVTypeVisitor {
+public:
+ explicit CVTypeVisitor(TypeVisitorCallbacks &Callbacks);
+
+ void addTypeServerHandler(TypeServerHandler &Handler);
+
+ Error visitTypeRecord(CVType &Record, TypeIndex Index);
+ Error visitTypeRecord(CVType &Record);
+
+ /// Visits the type records in Data. Sets the error flag on parse failures.
+ Error visitTypeStream(const CVTypeArray &Types);
+ Error visitTypeStream(CVTypeRange Types);
+ Error visitTypeStream(TypeCollection &Types);
+
+ Error visitMemberRecord(CVMemberRecord Record);
+ Error visitFieldListMemberStream(BinaryStreamReader &Stream);
+
+private:
+ Expected<bool> handleTypeServer(CVType &Record);
+ Error finishVisitation(CVType &Record);
+
+ /// The interface to the class that gets notified of each visitation.
+ TypeVisitorCallbacks &Callbacks;
+
+ TinyPtrVector<TypeServerHandler *> Handlers;
+};
+
+CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks)
+ : Callbacks(Callbacks) {}
+
void CVTypeVisitor::addTypeServerHandler(TypeServerHandler &Handler) {
Handlers.push_back(&Handler);
}
@@ -144,35 +205,6 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record) {
return finishVisitation(Record);
}
-static Error visitMemberRecord(CVMemberRecord &Record,
- TypeVisitorCallbacks &Callbacks) {
- if (auto EC = Callbacks.visitMemberBegin(Record))
- return EC;
-
- switch (Record.Kind) {
- default:
- if (auto EC = Callbacks.visitUnknownMember(Record))
- return EC;
- break;
-#define MEMBER_RECORD(EnumName, EnumVal, Name) \
- case EnumName: { \
- if (auto EC = visitKnownMember<Name##Record>(Record, Callbacks)) \
- return EC; \
- break; \
- }
-#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \
- MEMBER_RECORD(EnumVal, EnumVal, AliasName)
-#define TYPE_RECORD(EnumName, EnumVal, Name)
-#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
-#include "llvm/DebugInfo/CodeView/TypeRecords.def"
- }
-
- if (auto EC = Callbacks.visitMemberEnd(Record))
- return EC;
-
- return Error::success();
-}
-
Error CVTypeVisitor::visitMemberRecord(CVMemberRecord Record) {
return ::visitMemberRecord(Record, Callbacks);
}
@@ -194,12 +226,18 @@ Error CVTypeVisitor::visitTypeStream(CVTypeRange Types) {
return Error::success();
}
-Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader Reader) {
- FieldListDeserializer Deserializer(Reader);
- TypeVisitorCallbackPipeline Pipeline;
- Pipeline.addCallbackToPipeline(Deserializer);
- Pipeline.addCallbackToPipeline(Callbacks);
+Error CVTypeVisitor::visitTypeStream(TypeCollection &Types) {
+ Optional<TypeIndex> I = Types.getFirst();
+ while (I) {
+ CVType Type = Types.getType(*I);
+ if (auto EC = visitTypeRecord(Type, *I))
+ return EC;
+ I = Types.getNext(*I);
+ }
+ return Error::success();
+}
+Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader &Reader) {
TypeLeafKind Leaf;
while (!Reader.empty()) {
if (auto EC = Reader.readEnum(Leaf))
@@ -207,20 +245,13 @@ Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader Reader) {
CVMemberRecord Record;
Record.Kind = Leaf;
- if (auto EC = ::visitMemberRecord(Record, Pipeline))
+ if (auto EC = ::visitMemberRecord(Record, Callbacks))
return EC;
}
return Error::success();
}
-Error CVTypeVisitor::visitFieldListMemberStream(ArrayRef<uint8_t> Data) {
- BinaryByteStream S(Data, llvm::support::little);
- BinaryStreamReader SR(S);
- return visitFieldListMemberStream(SR);
-}
-
-namespace {
struct FieldListVisitHelper {
FieldListVisitHelper(TypeVisitorCallbacks &Callbacks, ArrayRef<uint8_t> Data,
VisitorDataSource Source)
@@ -241,11 +272,8 @@ struct FieldListVisitHelper {
};
struct VisitHelper {
- VisitHelper(TypeVisitorCallbacks &Callbacks, VisitorDataSource Source,
- TypeServerHandler *TS)
+ VisitHelper(TypeVisitorCallbacks &Callbacks, VisitorDataSource Source)
: Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
- if (TS)
- Visitor.addTypeServerHandler(*TS);
if (Source == VDS_BytesPresent) {
Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(Callbacks);
@@ -262,29 +290,57 @@ Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source,
TypeServerHandler *TS) {
- VisitHelper Helper(Callbacks, Source, TS);
- return Helper.Visitor.visitTypeRecord(Record, Index);
+ VisitHelper V(Callbacks, Source);
+ if (TS)
+ V.Visitor.addTypeServerHandler(*TS);
+ return V.Visitor.visitTypeRecord(Record, Index);
}
Error llvm::codeview::visitTypeRecord(CVType &Record,
TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source,
TypeServerHandler *TS) {
- VisitHelper Helper(Callbacks, Source, TS);
- return Helper.Visitor.visitTypeRecord(Record);
+ VisitHelper V(Callbacks, Source);
+ if (TS)
+ V.Visitor.addTypeServerHandler(*TS);
+ return V.Visitor.visitTypeRecord(Record);
}
-Error llvm::codeview::visitMemberRecordStream(ArrayRef<uint8_t> FieldList,
- TypeVisitorCallbacks &Callbacks) {
- CVTypeVisitor Visitor(Callbacks);
- return Visitor.visitFieldListMemberStream(FieldList);
+Error llvm::codeview::visitTypeStream(const CVTypeArray &Types,
+ TypeVisitorCallbacks &Callbacks,
+ TypeServerHandler *TS) {
+ VisitHelper V(Callbacks, VDS_BytesPresent);
+ if (TS)
+ V.Visitor.addTypeServerHandler(*TS);
+ return V.Visitor.visitTypeStream(Types);
+}
+
+Error llvm::codeview::visitTypeStream(CVTypeRange Types,
+ TypeVisitorCallbacks &Callbacks,
+ TypeServerHandler *TS) {
+ VisitHelper V(Callbacks, VDS_BytesPresent);
+ if (TS)
+ V.Visitor.addTypeServerHandler(*TS);
+ return V.Visitor.visitTypeStream(Types);
+}
+
+Error llvm::codeview::visitTypeStream(TypeCollection &Types,
+ TypeVisitorCallbacks &Callbacks,
+ TypeServerHandler *TS) {
+ // When the internal visitor calls Types.getType(Index) the interface is
+ // required to return a CVType with the bytes filled out. So we can assume
+ // that the bytes will be present when individual records are visited.
+ VisitHelper V(Callbacks, VDS_BytesPresent);
+ if (TS)
+ V.Visitor.addTypeServerHandler(*TS);
+ return V.Visitor.visitTypeStream(Types);
}
Error llvm::codeview::visitMemberRecord(CVMemberRecord Record,
TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source) {
- FieldListVisitHelper Helper(Callbacks, Record.Data, Source);
- return Helper.Visitor.visitMemberRecord(Record);
+ FieldListVisitHelper V(Callbacks, Record.Data, Source);
+ return V.Visitor.visitMemberRecord(Record);
}
Error llvm::codeview::visitMemberRecord(TypeLeafKind Kind,
@@ -296,16 +352,8 @@ Error llvm::codeview::visitMemberRecord(TypeLeafKind Kind,
return visitMemberRecord(R, Callbacks, VDS_BytesPresent);
}
-Error llvm::codeview::visitTypeStream(const CVTypeArray &Types,
- TypeVisitorCallbacks &Callbacks,
- TypeServerHandler *TS) {
- VisitHelper Helper(Callbacks, VDS_BytesPresent, TS);
- return Helper.Visitor.visitTypeStream(Types);
-}
-
-Error llvm::codeview::visitTypeStream(CVTypeRange Types,
- TypeVisitorCallbacks &Callbacks,
- TypeServerHandler *TS) {
- VisitHelper Helper(Callbacks, VDS_BytesPresent, TS);
- return Helper.Visitor.visitTypeStream(Types);
+Error llvm::codeview::visitMemberRecordStream(ArrayRef<uint8_t> FieldList,
+ TypeVisitorCallbacks &Callbacks) {
+ FieldListVisitHelper V(Callbacks, FieldList, VDS_BytesPresent);
+ return V.Visitor.visitFieldListMemberStream(V.Reader);
}
diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
new file mode 100644
index 000000000000..39eb4099ce9e
--- /dev/null
+++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -0,0 +1,229 @@
+//===- LazyRandomTypeCollection.cpp ---------------------------- *- C++--*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
+#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+static void error(Error &&EC) {
+ assert(!static_cast<bool>(EC));
+ if (EC)
+ consumeError(std::move(EC));
+}
+
+LazyRandomTypeCollection::LazyRandomTypeCollection(uint32_t RecordCountHint)
+ : LazyRandomTypeCollection(CVTypeArray(), RecordCountHint,
+ PartialOffsetArray()) {}
+
+LazyRandomTypeCollection::LazyRandomTypeCollection(
+ const CVTypeArray &Types, uint32_t RecordCountHint,
+ PartialOffsetArray PartialOffsets)
+ : Database(RecordCountHint), Types(Types), DatabaseVisitor(Database),
+ PartialOffsets(PartialOffsets) {
+ KnownOffsets.resize(Database.capacity());
+}
+
+LazyRandomTypeCollection::LazyRandomTypeCollection(ArrayRef<uint8_t> Data,
+ uint32_t RecordCountHint)
+ : LazyRandomTypeCollection(RecordCountHint) {
+ reset(Data);
+}
+
+LazyRandomTypeCollection::LazyRandomTypeCollection(StringRef Data,
+ uint32_t RecordCountHint)
+ : LazyRandomTypeCollection(
+ makeArrayRef(Data.bytes_begin(), Data.bytes_end()), RecordCountHint) {
+}
+
+LazyRandomTypeCollection::LazyRandomTypeCollection(const CVTypeArray &Types,
+ uint32_t NumRecords)
+ : LazyRandomTypeCollection(Types, NumRecords, PartialOffsetArray()) {}
+
+void LazyRandomTypeCollection::reset(StringRef Data) {
+ reset(makeArrayRef(Data.bytes_begin(), Data.bytes_end()));
+}
+
+void LazyRandomTypeCollection::reset(ArrayRef<uint8_t> Data) {
+ PartialOffsets = PartialOffsetArray();
+
+ BinaryStreamReader Reader(Data, support::little);
+ error(Reader.readArray(Types, Reader.getLength()));
+
+ KnownOffsets.resize(Database.capacity());
+}
+
+CVType LazyRandomTypeCollection::getType(TypeIndex Index) {
+ error(ensureTypeExists(Index));
+ return Database.getTypeRecord(Index);
+}
+
+StringRef LazyRandomTypeCollection::getTypeName(TypeIndex Index) {
+ if (!Index.isSimple()) {
+ // Try to make sure the type exists. Even if it doesn't though, it may be
+ // because we're dumping a symbol stream with no corresponding type stream
+ // present, in which case we still want to be able to print <unknown UDT>
+ // for the type names.
+ consumeError(ensureTypeExists(Index));
+ }
+
+ return Database.getTypeName(Index);
+}
+
+bool LazyRandomTypeCollection::contains(TypeIndex Index) {
+ return Database.contains(Index);
+}
+
+uint32_t LazyRandomTypeCollection::size() { return Database.size(); }
+
+uint32_t LazyRandomTypeCollection::capacity() { return Database.capacity(); }
+
+Error LazyRandomTypeCollection::ensureTypeExists(TypeIndex TI) {
+ if (!Database.contains(TI)) {
+ if (auto EC = visitRangeForType(TI))
+ return EC;
+ }
+ return Error::success();
+}
+
+Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) {
+ if (PartialOffsets.empty())
+ return fullScanForType(TI);
+
+ auto Next = std::upper_bound(PartialOffsets.begin(), PartialOffsets.end(), TI,
+ [](TypeIndex Value, const TypeIndexOffset &IO) {
+ return Value < IO.Type;
+ });
+
+ assert(Next != PartialOffsets.begin());
+ auto Prev = std::prev(Next);
+
+ TypeIndex TIB = Prev->Type;
+ if (Database.contains(TIB)) {
+ // They've asked us to fetch a type index, but the entry we found in the
+ // partial offsets array has already been visited. Since we visit an entire
+ // block every time, that means this record should have been previously
+ // discovered. Ultimately, this means this is a request for a non-existant
+ // type index.
+ return make_error<CodeViewError>("Invalid type index");
+ }
+
+ TypeIndex TIE;
+ if (Next == PartialOffsets.end()) {
+ TIE = TypeIndex::fromArrayIndex(Database.capacity());
+ } else {
+ TIE = Next->Type;
+ }
+
+ if (auto EC = visitRange(TIB, Prev->Offset, TIE))
+ return EC;
+ return Error::success();
+}
+
+Optional<TypeIndex> LazyRandomTypeCollection::getFirst() {
+ TypeIndex TI = TypeIndex::fromArrayIndex(0);
+ if (auto EC = ensureTypeExists(TI)) {
+ consumeError(std::move(EC));
+ return None;
+ }
+ return TI;
+}
+
+Optional<TypeIndex> LazyRandomTypeCollection::getNext(TypeIndex Prev) {
+ // We can't be sure how long this type stream is, given that the initial count
+ // given to the constructor is just a hint. So just try to make sure the next
+ // record exists, and if anything goes wrong, we must be at the end.
+ if (auto EC = ensureTypeExists(Prev + 1)) {
+ consumeError(std::move(EC));
+ return None;
+ }
+
+ return Prev + 1;
+}
+
+Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) {
+ assert(PartialOffsets.empty());
+
+ TypeIndex CurrentTI = TypeIndex::fromArrayIndex(0);
+ uint32_t Offset = 0;
+ auto Begin = Types.begin();
+
+ if (!Database.empty()) {
+ // In the case of type streams which we don't know the number of records of,
+ // it's possible to search for a type index triggering a full scan, but then
+ // later additional records are added since we didn't know how many there
+ // would be until we did a full visitation, then you try to access the new
+ // type triggering another full scan. To avoid this, we assume that if the
+ // database has some records, this must be what's going on. So we ask the
+ // database for the largest type index less than the one we're searching for
+ // and only do the forward scan from there.
+ auto Prev = Database.largestTypeIndexLessThan(TI);
+ assert(Prev.hasValue() && "Empty database with valid types?");
+ Offset = KnownOffsets[Prev->toArrayIndex()];
+ CurrentTI = *Prev;
+ ++CurrentTI;
+ Begin = Types.at(Offset);
+ ++Begin;
+ Offset = Begin.offset();
+ }
+
+ auto End = Types.end();
+ while (Begin != End) {
+ if (auto EC = visitOneRecord(CurrentTI, Offset, *Begin))
+ return EC;
+
+ Offset += Begin.getRecordLength();
+ ++Begin;
+ ++CurrentTI;
+ }
+ if (CurrentTI <= TI) {
+ return make_error<CodeViewError>("Type Index does not exist!");
+ }
+ return Error::success();
+}
+
+Error LazyRandomTypeCollection::visitRange(TypeIndex Begin,
+ uint32_t BeginOffset,
+ TypeIndex End) {
+
+ auto RI = Types.at(BeginOffset);
+ assert(RI != Types.end());
+
+ while (Begin != End) {
+ if (auto EC = visitOneRecord(Begin, BeginOffset, *RI))
+ return EC;
+
+ BeginOffset += RI.getRecordLength();
+ ++Begin;
+ ++RI;
+ }
+
+ return Error::success();
+}
+
+Error LazyRandomTypeCollection::visitOneRecord(TypeIndex TI, uint32_t Offset,
+ CVType &Record) {
+ assert(!Database.contains(TI));
+ if (auto EC = codeview::visitTypeRecord(Record, TI, DatabaseVisitor))
+ return EC;
+ // Keep the KnownOffsets array the same size as the Database's capacity. Since
+ // we don't always know how many records are in the type stream, we need to be
+ // prepared for the database growing and receicing a type index that can't fit
+ // in our current buffer.
+ if (KnownOffsets.size() < Database.capacity())
+ KnownOffsets.resize(Database.capacity());
+ KnownOffsets[TI.toArrayIndex()] = Offset;
+ return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp b/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp
deleted file mode 100644
index 704d1131108a..000000000000
--- a/lib/DebugInfo/CodeView/RandomAccessTypeVisitor.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-//===- RandomAccessTypeVisitor.cpp ---------------------------- *- C++ --*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/RandomAccessTypeVisitor.h"
-
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
-#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-RandomAccessTypeVisitor::RandomAccessTypeVisitor(
- const CVTypeArray &Types, uint32_t NumRecords,
- PartialOffsetArray PartialOffsets)
- : Database(NumRecords), Types(Types), DatabaseVisitor(Database),
- PartialOffsets(PartialOffsets) {
-
- KnownOffsets.resize(Database.capacity());
-}
-
-Error RandomAccessTypeVisitor::visitTypeIndex(TypeIndex TI,
- TypeVisitorCallbacks &Callbacks) {
- assert(TI.toArrayIndex() < Database.capacity());
-
- if (!Database.contains(TI)) {
- if (auto EC = visitRangeForType(TI))
- return EC;
- }
-
- assert(Database.contains(TI));
- auto &Record = Database.getTypeRecord(TI);
- return codeview::visitTypeRecord(Record, TI, Callbacks);
-}
-
-Error RandomAccessTypeVisitor::visitRangeForType(TypeIndex TI) {
- if (PartialOffsets.empty()) {
- TypeIndex TIB(TypeIndex::FirstNonSimpleIndex);
- TypeIndex TIE = TIB + Database.capacity();
- return visitRange(TIB, 0, TIE);
- }
-
- auto Next = std::upper_bound(PartialOffsets.begin(), PartialOffsets.end(), TI,
- [](TypeIndex Value, const TypeIndexOffset &IO) {
- return Value < IO.Type;
- });
-
- assert(Next != PartialOffsets.begin());
- auto Prev = std::prev(Next);
-
- TypeIndex TIB = Prev->Type;
- TypeIndex TIE;
- if (Next == PartialOffsets.end()) {
- TIE = TypeIndex::fromArrayIndex(Database.capacity());
- } else {
- TIE = Next->Type;
- }
-
- if (auto EC = visitRange(TIB, Prev->Offset, TIE))
- return EC;
- return Error::success();
-}
-
-Error RandomAccessTypeVisitor::visitRange(TypeIndex Begin, uint32_t BeginOffset,
- TypeIndex End) {
-
- auto RI = Types.at(BeginOffset);
- assert(RI != Types.end());
-
- while (Begin != End) {
- assert(!Database.contains(Begin));
- if (auto EC = codeview::visitTypeRecord(*RI, Begin, DatabaseVisitor))
- return EC;
- KnownOffsets[Begin.toArrayIndex()] = BeginOffset;
-
- BeginOffset += RI.getRecordLength();
- ++Begin;
- ++RI;
- }
-
- return Error::success();
-}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 5395e4349b28..7d01c8c5f194 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -11,7 +11,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
-#include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/StringTable.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
@@ -33,9 +32,9 @@ namespace {
/// the visitor out of SymbolDumper.h.
class CVSymbolDumperImpl : public SymbolVisitorCallbacks {
public:
- CVSymbolDumperImpl(TypeDatabase &TypeDB, SymbolDumpDelegate *ObjDelegate,
+ CVSymbolDumperImpl(TypeCollection &Types, SymbolDumpDelegate *ObjDelegate,
ScopedPrinter &W, bool PrintRecordBytes)
- : TypeDB(TypeDB), ObjDelegate(ObjDelegate), W(W),
+ : Types(Types), ObjDelegate(ObjDelegate), W(W),
PrintRecordBytes(PrintRecordBytes), InFunctionScope(false) {}
/// CVSymbolVisitor overrides.
@@ -54,7 +53,7 @@ private:
void printLocalVariableAddrGap(ArrayRef<LocalVariableAddrGap> Gaps);
void printTypeIndex(StringRef FieldName, TypeIndex TI);
- TypeDatabase &TypeDB;
+ TypeCollection &Types;
SymbolDumpDelegate *ObjDelegate;
ScopedPrinter &W;
@@ -83,7 +82,7 @@ void CVSymbolDumperImpl::printLocalVariableAddrGap(
}
void CVSymbolDumperImpl::printTypeIndex(StringRef FieldName, TypeIndex TI) {
- CVTypeDumper::printTypeIndex(W, FieldName, TI, TypeDB);
+ codeview::printTypeIndex(W, FieldName, TI, Types);
}
Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) {
@@ -670,7 +669,7 @@ Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
SymbolVisitorCallbackPipeline Pipeline;
SymbolDeserializer Deserializer(ObjDelegate.get());
- CVSymbolDumperImpl Dumper(TypeDB, ObjDelegate.get(), W, PrintRecordBytes);
+ CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(Dumper);
@@ -681,7 +680,7 @@ Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) {
SymbolVisitorCallbackPipeline Pipeline;
SymbolDeserializer Deserializer(ObjDelegate.get());
- CVSymbolDumperImpl Dumper(TypeDB, ObjDelegate.get(), W, PrintRecordBytes);
+ CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(Dumper);
diff --git a/lib/DebugInfo/CodeView/TypeDatabase.cpp b/lib/DebugInfo/CodeView/TypeDatabase.cpp
index 7924440e5e29..af05d2dc294b 100644
--- a/lib/DebugInfo/CodeView/TypeDatabase.cpp
+++ b/lib/DebugInfo/CodeView/TypeDatabase.cpp
@@ -72,16 +72,20 @@ TypeDatabase::TypeDatabase(uint32_t Capacity) : TypeNameStorage(Allocator) {
}
TypeIndex TypeDatabase::appendType(StringRef Name, const CVType &Data) {
- TypeIndex TI;
- TI = getAppendIndex();
- if (TI.toArrayIndex() >= capacity())
+ LargestTypeIndex = getAppendIndex();
+ if (LargestTypeIndex.toArrayIndex() >= capacity())
grow();
- recordType(Name, TI, Data);
- return TI;
+ recordType(Name, LargestTypeIndex, Data);
+ return LargestTypeIndex;
}
void TypeDatabase::recordType(StringRef Name, TypeIndex Index,
const CVType &Data) {
+ LargestTypeIndex = empty() ? Index : std::max(Index, LargestTypeIndex);
+
+ if (LargestTypeIndex.toArrayIndex() >= capacity())
+ grow(Index);
+
uint32_t AI = Index.toArrayIndex();
assert(!contains(Index));
@@ -144,19 +148,66 @@ uint32_t TypeDatabase::size() const { return Count; }
uint32_t TypeDatabase::capacity() const { return TypeRecords.size(); }
-void TypeDatabase::grow() {
- TypeRecords.emplace_back();
- CVUDTNames.emplace_back();
- ValidRecords.resize(ValidRecords.size() + 1);
+CVType TypeDatabase::getType(TypeIndex Index) { return getTypeRecord(Index); }
+
+StringRef TypeDatabase::getTypeName(TypeIndex Index) {
+ return static_cast<const TypeDatabase *>(this)->getTypeName(Index);
+}
+
+bool TypeDatabase::contains(TypeIndex Index) {
+ return static_cast<const TypeDatabase *>(this)->contains(Index);
+}
+
+uint32_t TypeDatabase::size() {
+ return static_cast<const TypeDatabase *>(this)->size();
+}
+
+uint32_t TypeDatabase::capacity() {
+ return static_cast<const TypeDatabase *>(this)->capacity();
+}
+
+void TypeDatabase::grow() { grow(LargestTypeIndex + 1); }
+
+void TypeDatabase::grow(TypeIndex NewIndex) {
+ uint32_t NewSize = NewIndex.toArrayIndex() + 1;
+
+ if (NewSize <= capacity())
+ return;
+
+ uint32_t NewCapacity = NewSize * 3 / 2;
+
+ TypeRecords.resize(NewCapacity);
+ CVUDTNames.resize(NewCapacity);
+ ValidRecords.resize(NewCapacity);
}
bool TypeDatabase::empty() const { return size() == 0; }
+Optional<TypeIndex> TypeDatabase::largestTypeIndexLessThan(TypeIndex TI) const {
+ uint32_t AI = TI.toArrayIndex();
+ int N = ValidRecords.find_prev(AI);
+ if (N == -1)
+ return None;
+ return TypeIndex::fromArrayIndex(N);
+}
+
TypeIndex TypeDatabase::getAppendIndex() const {
if (empty())
return TypeIndex::fromArrayIndex(0);
- int Index = ValidRecords.find_last();
- assert(Index != -1);
- return TypeIndex::fromArrayIndex(Index) + 1;
+ return LargestTypeIndex + 1;
+}
+
+Optional<TypeIndex> TypeDatabase::getFirst() {
+ int N = ValidRecords.find_first();
+ if (N == -1)
+ return None;
+ return TypeIndex::fromArrayIndex(N);
+}
+
+Optional<TypeIndex> TypeDatabase::getNext(TypeIndex Prev) {
+ int N = ValidRecords.find_next(Prev.toArrayIndex());
+ if (N == -1)
+ return None;
+ return TypeIndex::fromArrayIndex(N);
}
diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index 9485c9cfedff..84f52a055815 100644
--- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -10,15 +10,13 @@
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/Formatters.h"
+#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -165,16 +163,15 @@ static StringRef getLeafTypeName(TypeLeafKind LT) {
}
void TypeDumpVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI) const {
- CVTypeDumper::printTypeIndex(*W, FieldName, TI, TypeDB);
+ codeview::printTypeIndex(*W, FieldName, TI, TpiTypes);
}
void TypeDumpVisitor::printItemIndex(StringRef FieldName, TypeIndex TI) const {
- CVTypeDumper::printTypeIndex(*W, FieldName, TI, getSourceDB());
+ codeview::printTypeIndex(*W, FieldName, TI, getSourceTypes());
}
Error TypeDumpVisitor::visitTypeBegin(CVType &Record) {
- TypeIndex TI = getSourceDB().getAppendIndex();
- return visitTypeBegin(Record, TI);
+ return visitTypeBegin(Record, TypeIndex::fromArrayIndex(TpiTypes.size()));
}
Error TypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
@@ -245,7 +242,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, StringListRecord &Strs) {
W->printNumber("NumStrings", Size);
ListScope Arguments(*W, "Strings");
for (uint32_t I = 0; I < Size; ++I) {
- printTypeIndex("String", Indices[I]);
+ printItemIndex("String", Indices[I]);
}
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/TypeIndex.cpp b/lib/DebugInfo/CodeView/TypeIndex.cpp
new file mode 100644
index 000000000000..20ba6470cd5b
--- /dev/null
+++ b/lib/DebugInfo/CodeView/TypeIndex.cpp
@@ -0,0 +1,27 @@
+//===-- TypeIndex.cpp - CodeView type index ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+
+#include "llvm/DebugInfo/CodeView/TypeCollection.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+void llvm::codeview::printTypeIndex(ScopedPrinter &Printer, StringRef FieldName,
+ TypeIndex TI, TypeCollection &Types) {
+ StringRef TypeName;
+ if (!TI.isNoneType())
+ TypeName = Types.getTypeName(TI);
+ if (!TypeName.empty())
+ Printer.printHex(FieldName, TypeName, TI.getIndex());
+ else
+ Printer.printHex(FieldName, TI.getIndex());
+}
diff --git a/lib/DebugInfo/CodeView/TypeSerializer.cpp b/lib/DebugInfo/CodeView/TypeSerializer.cpp
index fd4d1853fa54..3b061e67e05e 100644
--- a/lib/DebugInfo/CodeView/TypeSerializer.cpp
+++ b/lib/DebugInfo/CodeView/TypeSerializer.cpp
@@ -66,6 +66,31 @@ TypeSerializer::insertRecordBytesPrivate(MutableArrayRef<uint8_t> Record) {
return Result.first->getValue();
}
+TypeIndex
+TypeSerializer::insertRecordBytesWithCopy(CVType &Record,
+ MutableArrayRef<uint8_t> Data) {
+ assert(Data.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
+
+ StringRef S(reinterpret_cast<const char *>(Data.data()), Data.size());
+
+ // Do a two state lookup / insert so that we don't have to allocate unless
+ // we're going
+ // to do an insert. This is a big memory savings.
+ auto Iter = HashedRecords.find(S);
+ if (Iter != HashedRecords.end())
+ return Iter->second;
+
+ LastTypeIndex = calcNextTypeIndex();
+ uint8_t *Copy = RecordStorage.Allocate<uint8_t>(Data.size());
+ ::memcpy(Copy, Data.data(), Data.size());
+ Data = MutableArrayRef<uint8_t>(Copy, Data.size());
+ S = StringRef(reinterpret_cast<const char *>(Data.data()), Data.size());
+ HashedRecords.insert(std::make_pair(S, LastTypeIndex));
+ SeenRecords.push_back(Data);
+ Record.RecordData = Data;
+ return LastTypeIndex;
+}
+
Expected<MutableArrayRef<uint8_t>>
TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
uint32_t Align = Record.size() % 4;
@@ -137,11 +162,9 @@ Expected<TypeIndex> TypeSerializer::visitTypeEndGetIndex(CVType &Record) {
reinterpret_cast<RecordPrefix *>(ThisRecordData.data());
Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t);
- uint8_t *Copy = RecordStorage.Allocate<uint8_t>(ThisRecordData.size());
- ::memcpy(Copy, ThisRecordData.data(), ThisRecordData.size());
- ThisRecordData = MutableArrayRef<uint8_t>(Copy, ThisRecordData.size());
- Record = CVType(*TypeKind, ThisRecordData);
- TypeIndex InsertedTypeIndex = insertRecordBytesPrivate(ThisRecordData);
+ Record.Type = *TypeKind;
+ TypeIndex InsertedTypeIndex =
+ insertRecordBytesWithCopy(Record, ThisRecordData);
// Write out each additional segment in reverse order, and update each
// record's continuation index to point to the previous one.
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index 51f24fa3f135..46747f8eab99 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -11,11 +11,9 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -60,9 +58,12 @@ namespace {
class TypeStreamMerger : public TypeVisitorCallbacks {
public:
TypeStreamMerger(TypeTableBuilder &DestIdStream,
- TypeTableBuilder &DestTypeStream, TypeServerHandler *Handler)
+ TypeTableBuilder &DestTypeStream,
+ SmallVectorImpl<TypeIndex> &SourceToDest,
+ TypeServerHandler *Handler)
: DestIdStream(DestIdStream), DestTypeStream(DestTypeStream),
- FieldListBuilder(DestTypeStream), Handler(Handler) {}
+ FieldListBuilder(DestTypeStream), Handler(Handler),
+ IndexMap(SourceToDest) {}
static const TypeIndex Untranslated;
@@ -143,7 +144,7 @@ private:
/// Map from source type index to destination type index. Indexed by source
/// type index minus 0x1000.
- SmallVector<TypeIndex, 0> IndexMap;
+ SmallVectorImpl<TypeIndex> &IndexMap;
};
} // end anonymous namespace
@@ -477,8 +478,9 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
Error llvm::codeview::mergeTypeStreams(TypeTableBuilder &DestIdStream,
TypeTableBuilder &DestTypeStream,
+ SmallVectorImpl<TypeIndex> &SourceToDest,
TypeServerHandler *Handler,
const CVTypeArray &Types) {
- return TypeStreamMerger(DestIdStream, DestTypeStream, Handler)
+ return TypeStreamMerger(DestIdStream, DestTypeStream, SourceToDest, Handler)
.mergeStream(Types);
}
diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
new file mode 100644
index 000000000000..a18710d6ab52
--- /dev/null
+++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
@@ -0,0 +1,83 @@
+//===- TypeTableCollection.cpp -------------------------------- *- C++ --*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
+
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+static void error(Error &&EC) {
+ assert(!static_cast<bool>(EC));
+ if (EC)
+ consumeError(std::move(EC));
+}
+
+TypeTableCollection::TypeTableCollection(
+ ArrayRef<MutableArrayRef<uint8_t>> Records)
+ : Records(Records), Database(Records.size()) {}
+
+Optional<TypeIndex> TypeTableCollection::getFirst() {
+ if (empty())
+ return None;
+ return TypeIndex::fromArrayIndex(0);
+}
+
+Optional<TypeIndex> TypeTableCollection::getNext(TypeIndex Prev) {
+ ++Prev;
+ assert(Prev.toArrayIndex() <= size());
+ if (Prev.toArrayIndex() == size())
+ return None;
+ return Prev;
+}
+
+void TypeTableCollection::ensureTypeExists(TypeIndex Index) {
+ assert(hasCapacityFor(Index));
+
+ if (Database.contains(Index))
+ return;
+
+ BinaryByteStream Bytes(Records[Index.toArrayIndex()], support::little);
+
+ CVType Type;
+ uint32_t Len;
+ error(VarStreamArrayExtractor<CVType>::extract(Bytes, Len, Type));
+
+ TypeDatabaseVisitor DBV(Database);
+ error(codeview::visitTypeRecord(Type, Index, DBV));
+ assert(Database.contains(Index));
+}
+
+CVType TypeTableCollection::getType(TypeIndex Index) {
+ ensureTypeExists(Index);
+ return Database.getTypeRecord(Index);
+}
+
+StringRef TypeTableCollection::getTypeName(TypeIndex Index) {
+ if (!Index.isSimple())
+ ensureTypeExists(Index);
+ return Database.getTypeName(Index);
+}
+
+bool TypeTableCollection::contains(TypeIndex Index) {
+ return Database.contains(Index);
+}
+
+uint32_t TypeTableCollection::size() { return Records.size(); }
+
+uint32_t TypeTableCollection::capacity() { return Records.size(); }
+
+bool TypeTableCollection::hasCapacityFor(TypeIndex Index) const {
+ return Index.toArrayIndex() < Records.size();
+}
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 61e75a2b56ab..8e7c6c43d1a2 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -979,7 +979,7 @@ Error DWARFContextInMemory::maybeDecompress(const SectionRef &Sec,
return Decompressor.takeError();
SmallString<32> Out;
- if (auto Err = Decompressor->decompress(Out))
+ if (auto Err = Decompressor->resizeAndDecompress(Out))
return Err;
UncompressedSections.emplace_back(std::move(Out));
@@ -1063,18 +1063,20 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
// TODO: Add support for relocations in other sections as needed.
// Record relocations for the debug_info and debug_line sections.
- RelocAddrMap *Map = StringSwitch<RelocAddrMap*>(RelSecName)
- .Case("debug_info", &InfoSection.Relocs)
- .Case("debug_loc", &LocSection.Relocs)
- .Case("debug_info.dwo", &InfoDWOSection.Relocs)
- .Case("debug_line", &LineSection.Relocs)
- .Case("debug_ranges", &RangeSection.Relocs)
- .Case("apple_names", &AppleNamesSection.Relocs)
- .Case("apple_types", &AppleTypesSection.Relocs)
- .Case("apple_namespaces", &AppleNamespacesSection.Relocs)
- .Case("apple_namespac", &AppleNamespacesSection.Relocs)
- .Case("apple_objc", &AppleObjCSection.Relocs)
- .Default(nullptr);
+ RelocAddrMap *Map =
+ StringSwitch<RelocAddrMap *>(RelSecName)
+ .Case("debug_info", &InfoSection.Relocs)
+ .Case("debug_loc", &LocSection.Relocs)
+ .Case("debug_info.dwo", &InfoDWOSection.Relocs)
+ .Case("debug_line", &LineSection.Relocs)
+ .Case("debug_ranges", &RangeSection.Relocs)
+ .Case("debug_addr", &AddrSection.Relocs)
+ .Case("apple_names", &AppleNamesSection.Relocs)
+ .Case("apple_types", &AppleTypesSection.Relocs)
+ .Case("apple_namespaces", &AppleNamespacesSection.Relocs)
+ .Case("apple_namespac", &AppleNamespacesSection.Relocs)
+ .Case("apple_objc", &AppleObjCSection.Relocs)
+ .Default(nullptr);
if (!Map) {
// Find debug_types relocs by section rather than name as there are
// multiple, comdat grouped, debug_types sections.
@@ -1104,14 +1106,14 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
}
object::RelocVisitor V(Obj);
- object::RelocToApply R(V.visit(Reloc.getType(), Reloc, *SymAddrOrErr));
+ uint64_t Val = V.visit(Reloc.getType(), Reloc, *SymAddrOrErr);
if (V.error()) {
SmallString<32> Name;
Reloc.getTypeName(Name);
errs() << "error: failed to compute relocation: " << Name << "\n";
continue;
}
- Map->insert({Reloc.getOffset(), {R.Value}});
+ Map->insert({Reloc.getOffset(), {Val}});
}
}
}
@@ -1148,7 +1150,7 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) {
.Case("debug_line.dwo", &LineDWOSection.Data)
.Case("debug_str.dwo", &StringDWOSection)
.Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
- .Case("debug_addr", &AddrSection)
+ .Case("debug_addr", &AddrSection.Data)
.Case("apple_names", &AppleNamesSection.Data)
.Case("apple_types", &AppleTypesSection.Data)
.Case("apple_namespaces", &AppleNamespacesSection.Data)
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 3835d4da9ae9..c268afc222c3 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -33,7 +33,7 @@ using namespace dwarf;
void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(),
- C.getStringSection(), StringRef(), C.getAddrSection(),
+ C.getStringSection(), StringRef(), &C.getAddrSection(),
C.getLineSection().Data, C.isLittleEndian(), false);
}
@@ -42,14 +42,14 @@ void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
DWARFUnitIndex *Index) {
parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &C.getRangeDWOSection(),
C.getStringDWOSection(), C.getStringOffsetDWOSection(),
- C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(),
+ &C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(),
true);
}
DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
const DWARFDebugAbbrev *DA, const DWARFSection *RS,
- StringRef SS, StringRef SOS, StringRef AOS, StringRef LS,
- bool LE, bool IsDWO,
+ StringRef SS, StringRef SOS, const DWARFSection *AOS,
+ StringRef LS, bool LE, bool IsDWO,
const DWARFUnitSectionBase &UnitSection,
const DWARFUnitIndex::Entry *IndexEntry)
: Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
@@ -69,10 +69,10 @@ DWARFUnit::~DWARFUnit() = default;
bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
uint64_t &Result) const {
uint32_t Offset = AddrOffsetSectionBase + Index * AddrSize;
- if (AddrOffsetSection.size() < Offset + AddrSize)
+ if (AddrOffsetSection->Data.size() < Offset + AddrSize)
return false;
- DataExtractor DA(AddrOffsetSection, isLittleEndian, AddrSize);
- Result = DA.getAddress(&Offset);
+ DataExtractor DA(AddrOffsetSection->Data, isLittleEndian, AddrSize);
+ Result = getRelocatedValue(DA, AddrSize, &Offset, &AddrOffsetSection->Relocs);
return true;
}
@@ -249,7 +249,7 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
return DieArray.size();
}
-DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath) {
+DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath, uint64_t DWOId) {
auto Obj = object::ObjectFile::createObjectFile(DWOPath);
if (!Obj) {
// TODO: Actually report errors helpfully.
@@ -259,8 +259,11 @@ DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath) {
DWOFile = std::move(Obj.get());
DWOContext.reset(
cast<DWARFContext>(new DWARFContextInMemory(*DWOFile.getBinary())));
- if (DWOContext->getNumDWOCompileUnits() > 0)
- DWOU = DWOContext->getDWOCompileUnitAtIndex(0);
+ for (const auto &DWOCU : DWOContext->dwo_compile_units())
+ if (DWOCU->getDWOId() == DWOId) {
+ DWOU = DWOCU.get();
+ return;
+ }
}
bool DWARFUnit::parseDWO() {
@@ -281,10 +284,12 @@ bool DWARFUnit::parseDWO() {
sys::path::append(AbsolutePath, *CompilationDir);
}
sys::path::append(AbsolutePath, *DWOFileName);
- DWO = llvm::make_unique<DWOHolder>(AbsolutePath);
+ auto DWOId = getDWOId();
+ if (!DWOId)
+ return false;
+ DWO = llvm::make_unique<DWOHolder>(AbsolutePath, *DWOId);
DWARFUnit *DWOCU = DWO->getUnit();
- // Verify that compile unit in .dwo file is valid.
- if (!DWOCU || DWOCU->getDWOId() != getDWOId()) {
+ if (!DWOCU) {
DWO.reset();
return false;
}
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index f7538c580ba4..2f4fb6cc295d 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -72,14 +72,6 @@ Error DbiStream::reload() {
return make_error<RawError>(raw_error_code::feature_unsupported,
"Unsupported DBI version.");
- auto IS = Pdb.getPDBInfoStream();
- if (!IS)
- return IS.takeError();
-
- if (Header->Age != IS->getAge())
- return make_error<RawError>(raw_error_code::corrupt_file,
- "DBI Age does not match PDB Age.");
-
if (Stream->getLength() !=
sizeof(DbiStreamHeader) + Header->ModiSubstreamSize +
Header->SecContrSubstreamSize + Header->SectionMapSize +
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index 4dd965c69071..c6568029ec55 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -117,6 +117,7 @@ Expected<uint32_t> PDBFileBuilder::getNamedStreamIndex(StringRef Name) const {
}
Error PDBFileBuilder::commit(StringRef Filename) {
+ assert(!Filename.empty());
auto ExpectedLayout = finalizeMsfLayout();
if (!ExpectedLayout)
return ExpectedLayout.takeError();
diff --git a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
index cb783cf4fea7..f00567db743e 100644
--- a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
@@ -21,6 +21,7 @@
#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index c0999d93dbb9..8e0065873892 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -9,10 +9,7 @@
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h"
@@ -91,9 +88,6 @@ Error TpiStream::reload() {
HSR.setOffset(Header->HashValueBuffer.Off);
if (auto EC = HSR.readArray(HashValues, NumHashValues))
return EC;
- std::vector<ulittle32_t> HashValueList;
- for (auto I : HashValues)
- HashValueList.push_back(I);
HSR.setOffset(Header->IndexOffsetBuffer.Off);
uint32_t NumTypeIndexOffsets =
diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index 701a318511b8..20456cc97823 100644
--- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -69,7 +69,7 @@ Error TpiStreamBuilder::finalize() {
uint32_t Count = TypeRecords.size();
- H->Version = *VerHeader;
+ H->Version = VerHeader;
H->HeaderSize = sizeof(TpiStreamHeader);
H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex;
H->TypeIndexEnd = H->TypeIndexBegin + Count;
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index 49dbe74d25df..f454ae61d965 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -1947,7 +1947,7 @@ static const char *parse_type(const char *first, const char *last, C &db) {
break;
}
}
- // drop through
+ // falls through
default:
// must check for builtin-types before class-enum-types to avoid
// ambiguities with operator-names
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index ce60367a6c8b..adb31d127a2e 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -1058,7 +1058,7 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
#ifndef NDEBUG
// FIXME it is not obvious how this should work for alignment. For now, say
// we can't change a known alignment.
- unsigned OldAlign = getParamAlignment(Index);
+ unsigned OldAlign = getAttributes(Index).getAlignment();
unsigned NewAlign = B.getAlignment();
assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
"Attempt to change alignment!");
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index 8bcba7672315..06934b365a11 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -521,6 +521,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
return true;
}
}
+ break;
}
case 'o':
// We only need to change the name to match the mangling including the
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 4b9d89cda539..8b0ff66334a7 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -37,10 +37,6 @@ using namespace llvm;
// Constant Class
//===----------------------------------------------------------------------===//
-void Constant::anchor() { }
-
-void ConstantData::anchor() {}
-
bool Constant::isNegativeZeroValue() const {
// Floating point values have an explicit -0.0 value.
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
@@ -496,8 +492,6 @@ void Constant::removeDeadConstantUsers() const {
// ConstantInt
//===----------------------------------------------------------------------===//
-void ConstantInt::anchor() { }
-
ConstantInt::ConstantInt(IntegerType *Ty, const APInt &V)
: ConstantData(Ty, ConstantIntVal), Val(V) {
assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
@@ -610,8 +604,6 @@ static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
return &APFloat::PPCDoubleDouble();
}
-void ConstantFP::anchor() { }
-
Constant *ConstantFP::get(Type *Ty, double V) {
LLVMContext &Context = Ty->getContext();
@@ -2266,9 +2258,6 @@ Type *GetElementPtrConstantExpr::getResultElementType() const {
//===----------------------------------------------------------------------===//
// ConstantData* implementations
-void ConstantDataArray::anchor() {}
-void ConstantDataVector::anchor() {}
-
Type *ConstantDataSequential::getElementType() const {
return getType()->getElementType();
}
@@ -2627,8 +2616,8 @@ Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
return ConstantInt::get(getElementType(), getElementAsInteger(Elt));
}
-bool ConstantDataSequential::isString() const {
- return isa<ArrayType>(getType()) && getElementType()->isIntegerTy(8);
+bool ConstantDataSequential::isString(unsigned CharSize) const {
+ return isa<ArrayType>(getType()) && getElementType()->isIntegerTy(CharSize);
}
bool ConstantDataSequential::isCString() const {
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index 25eb9452d9d0..6c189cf656de 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -44,8 +44,6 @@ namespace llvm {
/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
/// behind the scenes to implement unary constant exprs.
class UnaryConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty)
: ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
@@ -65,8 +63,6 @@ public:
/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
/// behind the scenes to implement binary constant exprs.
class BinaryConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
unsigned Flags)
@@ -90,8 +86,6 @@ public:
/// SelectConstantExpr - This class is private to Constants.cpp, and is used
/// behind the scenes to implement select constant exprs.
class SelectConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
: ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
@@ -115,8 +109,6 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// extractelement constant exprs.
class ExtractElementConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
ExtractElementConstantExpr(Constant *C1, Constant *C2)
: ConstantExpr(cast<VectorType>(C1->getType())->getElementType(),
@@ -140,8 +132,6 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// insertelement constant exprs.
class InsertElementConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
: ConstantExpr(C1->getType(), Instruction::InsertElement,
@@ -166,8 +156,6 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// shufflevector constant exprs.
class ShuffleVectorConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
: ConstantExpr(VectorType::get(
@@ -195,8 +183,6 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// extractvalue constant exprs.
class ExtractValueConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
ExtractValueConstantExpr(Constant *Agg, ArrayRef<unsigned> IdxList,
Type *DestTy)
@@ -230,8 +216,6 @@ public:
/// Constants.cpp, and is used behind the scenes to implement
/// insertvalue constant exprs.
class InsertValueConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
InsertValueConstantExpr(Constant *Agg, Constant *Val,
ArrayRef<unsigned> IdxList, Type *DestTy)
@@ -271,8 +255,6 @@ class GetElementPtrConstantExpr : public ConstantExpr {
GetElementPtrConstantExpr(Type *SrcElementTy, Constant *C,
ArrayRef<Constant *> IdxList, Type *DestTy);
- void anchor() override;
-
public:
static GetElementPtrConstantExpr *Create(Type *SrcElementTy, Constant *C,
ArrayRef<Constant *> IdxList,
@@ -301,8 +283,6 @@ public:
// behind the scenes to implement ICmp and FCmp constant expressions. This is
// needed in order to store the predicate value for these instructions.
class CompareConstantExpr : public ConstantExpr {
- void anchor() override;
-
public:
unsigned short predicate;
CompareConstantExpr(Type *ty, Instruction::OtherOps opc,
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index c117d29b7f69..d5e29649a237 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -307,7 +307,7 @@ void DataLayout::parseSpecifier(StringRef Desc) {
case 'a': {
AlignTypeEnum AlignType;
switch (Specifier) {
- default:
+ default: llvm_unreachable("Unexpected specifier!");
case 'i': AlignType = INTEGER_ALIGN; break;
case 'v': AlignType = VECTOR_ALIGN; break;
case 'f': AlignType = FLOAT_ALIGN; break;
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 16a9e51b8306..39de4b0a97fa 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -66,8 +66,6 @@ template class llvm::SymbolTableListTraits<BasicBlock>;
// Argument Implementation
//===----------------------------------------------------------------------===//
-void Argument::anchor() {}
-
Argument::Argument(Type *Ty, const Twine &Name, Function *Par, unsigned ArgNo)
: Value(Ty, Value::ArgumentVal), Parent(Par), ArgNo(ArgNo) {
setName(Name);
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index 3477c087967f..7572d0c6b3bc 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -381,8 +381,11 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, unsigned Align,
Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context),
NumElts));
+ if (!PassThru)
+ PassThru = UndefValue::get(DataTy);
+
Type *OverloadedTypes[] = {DataTy, PtrsTy};
- Value * Ops[] = {Ptrs, getInt32(Align), Mask, UndefValue::get(DataTy)};
+ Value * Ops[] = {Ptrs, getInt32(Align), Mask, PassThru};
// We specify only one type when we create this intrinsic. Types of other
// arguments are derived from this type.
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 8feeeb65d445..6c0c5a267f81 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -40,10 +40,6 @@ InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString,
"Function type not legal for constraints!");
}
-// Implement the first virtual method in this class in this file so the
-// InlineAsm vtable is emitted here.
-InlineAsm::~InlineAsm() = default;
-
InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString,
StringRef Constraints, bool hasSideEffects,
bool isAlignStack, AsmDialect asmDialect) {
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 91b9d9232b54..828e78b13005 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -43,8 +43,6 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
InsertAtEnd->getInstList().push_back(this);
}
-
-// Out of line virtual method, so the vtable, etc has a home.
Instruction::~Instruction() {
assert(!Parent && "Instruction still linked in the program!");
if (hasMetadataHashEntry())
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 5a5b9c0d06bb..01d4ed6c8eef 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -59,9 +59,6 @@ User::op_iterator CallSite::getCallee() const {
// TerminatorInst Class
//===----------------------------------------------------------------------===//
-// Out of line virtual method, so the vtable, etc has a home.
-TerminatorInst::~TerminatorInst() = default;
-
unsigned TerminatorInst::getNumSuccessors() const {
switch (getOpcode()) {
#define HANDLE_TERM_INST(N, OPC, CLASS) \
@@ -99,13 +96,6 @@ void TerminatorInst::setSuccessor(unsigned idx, BasicBlock *B) {
}
//===----------------------------------------------------------------------===//
-// UnaryInstruction Class
-//===----------------------------------------------------------------------===//
-
-// Out of line virtual method, so the vtable, etc has a home.
-UnaryInstruction::~UnaryInstruction() = default;
-
-//===----------------------------------------------------------------------===//
// SelectInst Class
//===----------------------------------------------------------------------===//
@@ -138,8 +128,6 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
// PHINode Class
//===----------------------------------------------------------------------===//
-void PHINode::anchor() {}
-
PHINode::PHINode(const PHINode &PN)
: Instruction(PN.getType(), Instruction::PHI, nullptr, PN.getNumOperands()),
ReservedSpace(PN.getNumOperands()) {
@@ -293,8 +281,6 @@ void LandingPadInst::addClause(Constant *Val) {
// CallInst Implementation
//===----------------------------------------------------------------------===//
-CallInst::~CallInst() = default;
-
void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
this->FTy = FTy;
@@ -900,8 +886,6 @@ BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
llvm_unreachable("ReturnInst has no successors!");
}
-ReturnInst::~ReturnInst() = default;
-
//===----------------------------------------------------------------------===//
// ResumeInst Implementation
//===----------------------------------------------------------------------===//
@@ -1337,9 +1321,6 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
setName(Name);
}
-// Out of line virtual method, so the vtable, etc has a home.
-AllocaInst::~AllocaInst() = default;
-
void AllocaInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
assert(Align <= MaximumAlignment &&
@@ -1689,8 +1670,6 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
// GetElementPtrInst Implementation
//===----------------------------------------------------------------------===//
-void GetElementPtrInst::anchor() {}
-
void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
const Twine &Name) {
assert(getNumOperands() == 1 + IdxList.size() &&
@@ -2357,8 +2336,6 @@ float FPMathOperator::getFPAccuracy() const {
// CastInst Class
//===----------------------------------------------------------------------===//
-void CastInst::anchor() {}
-
// Just determine if this cast only deals with integral->integral conversion.
bool CastInst::isIntegerCast() const {
switch (getOpcode()) {
@@ -3387,8 +3364,6 @@ AddrSpaceCastInst::AddrSpaceCastInst(
// CmpInst Classes
//===----------------------------------------------------------------------===//
-void CmpInst::anchor() {}
-
CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
Value *RHS, const Twine &Name, Instruction *InsertBefore)
: Instruction(ty, op,
@@ -3528,8 +3503,6 @@ StringRef CmpInst::getPredicateName(Predicate Pred) {
}
}
-void ICmpInst::anchor() {}
-
ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
switch (pred) {
default: llvm_unreachable("Unknown icmp predicate!");
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index 343722463e5f..4a30d28c3913 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -215,27 +215,6 @@ uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const {
return I->second;
}
-// ConstantsContext anchors
-void UnaryConstantExpr::anchor() { }
-
-void BinaryConstantExpr::anchor() { }
-
-void SelectConstantExpr::anchor() { }
-
-void ExtractElementConstantExpr::anchor() { }
-
-void InsertElementConstantExpr::anchor() { }
-
-void ShuffleVectorConstantExpr::anchor() { }
-
-void ExtractValueConstantExpr::anchor() { }
-
-void InsertValueConstantExpr::anchor() { }
-
-void GetElementPtrConstantExpr::anchor() { }
-
-void CompareConstantExpr::anchor() { }
-
/// Singleton instance of the OptBisect class.
///
/// This singleton is accessed via the LLVMContext::getOptBisect() function. It
diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp
index 584dee2869c1..c0f6f07169ff 100644
--- a/lib/IR/PassRegistry.cpp
+++ b/lib/IR/PassRegistry.cpp
@@ -105,8 +105,6 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
ImplementationInfo->getNormalCtor() &&
"Cannot specify pass as default if it does not have a default ctor");
InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
- InterfaceInfo->setTargetMachineCtor(
- ImplementationInfo->getTargetMachineCtor());
}
}
diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp
index 497b4aa17643..d46039107f33 100644
--- a/lib/IR/User.cpp
+++ b/lib/IR/User.cpp
@@ -19,8 +19,6 @@ class BasicBlock;
// User Class
//===----------------------------------------------------------------------===//
-void User::anchor() {}
-
void User::replaceUsesOfWith(Value *From, Value *To) {
if (From == To) return; // Duh what?
@@ -193,12 +191,4 @@ void User::operator delete(void *Usr) {
}
}
-//===----------------------------------------------------------------------===//
-// Operator Class
-//===----------------------------------------------------------------------===//
-
-Operator::~Operator() {
- llvm_unreachable("should never destroy an Operator");
-}
-
} // End llvm namespace
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 02b40c93b5d8..51a7d424c1f3 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DerivedUser.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
@@ -59,7 +60,7 @@ Value::Value(Type *ty, unsigned scid)
(SubclassID < ConstantFirstVal || SubclassID > ConstantLastVal))
assert((VTy->isFirstClassType() || VTy->isVoidTy()) &&
"Cannot create non-first-class values except for constants!");
- static_assert(sizeof(Value) == 3 * sizeof(void *) + 2 * sizeof(unsigned),
+ static_assert(sizeof(Value) == 2 * sizeof(void *) + 2 * sizeof(unsigned),
"Value too big");
}
@@ -89,6 +90,32 @@ Value::~Value() {
destroyValueName();
}
+void Value::deleteValue() {
+ switch (getValueID()) {
+#define HANDLE_VALUE(Name) \
+ case Value::Name##Val: \
+ delete static_cast<Name *>(this); \
+ break;
+#define HANDLE_MEMORY_VALUE(Name) \
+ case Value::Name##Val: \
+ static_cast<DerivedUser *>(this)->DeleteValue( \
+ static_cast<DerivedUser *>(this)); \
+ break;
+#define HANDLE_INSTRUCTION(Name) /* nothing */
+#include "llvm/IR/Value.def"
+
+#define HANDLE_INST(N, OPC, CLASS) \
+ case Value::InstructionVal + Instruction::OPC: \
+ delete static_cast<CLASS *>(this); \
+ break;
+#define HANDLE_USER_INST(N, OPC, CLASS)
+#include "llvm/IR/Instruction.def"
+
+ default:
+ llvm_unreachable("attempting to delete unknown value kind");
+ }
+}
+
void Value::destroyValueName() {
ValueName *Name = getValueName();
if (Name)
diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp
index 2132e1659225..cf6ee063c2d5 100644
--- a/lib/IR/ValueTypes.cpp
+++ b/lib/IR/ValueTypes.cpp
@@ -142,6 +142,7 @@ std::string EVT::getEVTString() const {
case MVT::Other: return "ch";
case MVT::Glue: return "glue";
case MVT::x86mmx: return "x86mmx";
+ case MVT::v1i1: return "v1i1";
case MVT::v2i1: return "v2i1";
case MVT::v4i1: return "v4i1";
case MVT::v8i1: return "v8i1";
@@ -220,6 +221,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::f128: return Type::getFP128Ty(Context);
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::v1i1: return VectorType::get(Type::getInt1Ty(Context), 1);
case MVT::v2i1: return VectorType::get(Type::getInt1Ty(Context), 2);
case MVT::v4i1: return VectorType::get(Type::getInt1Ty(Context), 4);
case MVT::v8i1: return VectorType::get(Type::getInt1Ty(Context), 8);
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 3b68d6365872..21e8048442be 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -1317,6 +1317,12 @@ Verifier::visitModuleFlag(const MDNode *Op,
Assert(Inserted,
"module flag identifiers must be unique (or of 'require' type)", ID);
}
+
+ if (ID->getString() == "wchar_size") {
+ ConstantInt *Value
+ = mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2));
+ Assert(Value, "wchar_size metadata requires constant integer argument");
+ }
}
/// Return true if this attribute kind only applies to functions.
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 65a7994325bc..ca3fc60f9501 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -25,9 +25,11 @@
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Linker/Linker.h"
@@ -62,6 +64,7 @@ namespace llvm {
extern cl::opt<bool> LTODiscardValueNames;
extern cl::opt<std::string> LTORemarksFilename;
extern cl::opt<bool> LTOPassRemarksWithHotness;
+extern cl::opt<bool> LTOStripInvalidDebugInfo;
}
namespace {
@@ -142,6 +145,30 @@ static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) {
report_fatal_error("renameModuleForThinLTO failed");
}
+namespace {
+class ThinLTODiagnosticInfo : public DiagnosticInfo {
+ const Twine &Msg;
+public:
+ ThinLTODiagnosticInfo(const Twine &DiagMsg,
+ DiagnosticSeverity Severity = DS_Error)
+ : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {}
+ void print(DiagnosticPrinter &DP) const override { DP << Msg; }
+};
+}
+
+/// Verify the module and strip broken debug info.
+static void verifyLoadedModule(Module &TheModule) {
+ bool BrokenDebugInfo = false;
+ if (verifyModule(TheModule, &dbgs(),
+ LTOStripInvalidDebugInfo ? &BrokenDebugInfo : nullptr))
+ report_fatal_error("Broken module found, compilation aborted!");
+ if (BrokenDebugInfo) {
+ TheModule.getContext().diagnose(ThinLTODiagnosticInfo(
+ "Invalid debug info found, debug info will be stripped", DS_Warning));
+ StripDebugInfo(TheModule);
+ }
+}
+
static std::unique_ptr<Module>
loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
bool Lazy, bool IsImporting) {
@@ -159,6 +186,8 @@ loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
});
report_fatal_error("Can't load module, abort.");
}
+ if (!Lazy)
+ verifyLoadedModule(*ModuleOrErr.get());
return std::move(ModuleOrErr.get());
}
@@ -181,6 +210,8 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
});
report_fatal_error("importFunctions failed");
}
+ // Verify again after cross-importing.
+ verifyLoadedModule(TheModule);
}
static void optimizeModule(Module &TheModule, TargetMachine &TM,
@@ -195,7 +226,8 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM,
PMB.OptLevel = OptLevel;
PMB.LoopVectorize = true;
PMB.SLPVectorize = true;
- PMB.VerifyInput = true;
+ // Already did this in verifyLoadedModule().
+ PMB.VerifyInput = false;
PMB.VerifyOutput = false;
legacy::PassManager PM;
@@ -505,29 +537,25 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder,
void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
ThinLTOBuffer Buffer(Data, Identifier);
- if (Modules.empty()) {
- // First module added, so initialize the triple and some options
- LLVMContext Context;
- StringRef TripleStr;
- ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
- Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
- if (TripleOrErr)
- TripleStr = *TripleOrErr;
- Triple TheTriple(TripleStr);
+ LLVMContext Context;
+ StringRef TripleStr;
+ ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
+ Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
+
+ if (TripleOrErr)
+ TripleStr = *TripleOrErr;
+
+ Triple TheTriple(TripleStr);
+
+ if (Modules.empty())
initTMBuilder(TMBuilder, Triple(TheTriple));
+ else if (TMBuilder.TheTriple != TheTriple) {
+ if (!TMBuilder.TheTriple.isCompatibleWith(TheTriple))
+ report_fatal_error("ThinLTO modules with incompatible triples not "
+ "supported");
+ initTMBuilder(TMBuilder, Triple(TMBuilder.TheTriple.merge(TheTriple)));
}
-#ifndef NDEBUG
- else {
- LLVMContext Context;
- StringRef TripleStr;
- ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
- Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
- if (TripleOrErr)
- TripleStr = *TripleOrErr;
- assert(TMBuilder.TheTriple.str() == TripleStr &&
- "ThinLTO modules with different triple not supported");
- }
-#endif
+
Modules.push_back(Buffer);
}
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index ecef1efda1a2..c0af21aa148c 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -1243,27 +1243,6 @@ Error IRLinker::linkModuleFlagsMetadata() {
return Error::success();
}
-// This function returns true if the triples match.
-static bool triplesMatch(const Triple &T0, const Triple &T1) {
- // If vendor is apple, ignore the version number.
- if (T0.getVendor() == Triple::Apple)
- return T0.getArch() == T1.getArch() && T0.getSubArch() == T1.getSubArch() &&
- T0.getVendor() == T1.getVendor() && T0.getOS() == T1.getOS();
-
- return T0 == T1;
-}
-
-// This function returns the merged triple.
-static std::string mergeTriples(const Triple &SrcTriple,
- const Triple &DstTriple) {
- // If vendor is apple, pick the triple with the larger version number.
- if (SrcTriple.getVendor() == Triple::Apple)
- if (DstTriple.isOSVersionLT(SrcTriple))
- return SrcTriple.str();
-
- return DstTriple.str();
-}
-
Error IRLinker::run() {
// Ensure metadata materialized before value mapping.
if (SrcM->getMaterializer())
@@ -1289,14 +1268,15 @@ Error IRLinker::run() {
Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM.getTargetTriple());
- if (!SrcM->getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple))
+ if (!SrcM->getTargetTriple().empty()&&
+ !SrcTriple.isCompatibleWith(DstTriple))
emitWarning("Linking two modules of different target triples: " +
SrcM->getModuleIdentifier() + "' is '" +
SrcM->getTargetTriple() + "' whereas '" +
DstM.getModuleIdentifier() + "' is '" + DstM.getTargetTriple() +
"'\n");
- DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple));
+ DstM.setTargetTriple(SrcTriple.merge(DstTriple));
// Append the module inline asm string.
if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) {
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index 2b44c4a82d2c..116af3c917be 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -17,6 +17,7 @@
#include "llvm/Object/Error.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/WindowsResource.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
@@ -71,9 +72,10 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
return ObjectFile::createSymbolicFile(Buffer, Type, Context);
case sys::fs::file_magic::macho_universal_binary:
return MachOUniversalBinary::create(Buffer);
+ case sys::fs::file_magic::windows_resource:
+ return WindowsResource::createWindowsResource(Buffer);
case sys::fs::file_magic::unknown:
case sys::fs::file_magic::coff_cl_gl_object:
- case sys::fs::file_magic::windows_resource:
// Unrecognized object file format.
return errorCodeToError(object_error::invalid_file_type);
}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 08365e71c2f6..1d08a9efd8b3 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -2,6 +2,8 @@ add_llvm_library(LLVMObject
Archive.cpp
ArchiveWriter.cpp
Binary.cpp
+ COFFImportFile.cpp
+ COFFModuleDefinition.cpp
COFFObjectFile.cpp
Decompressor.cpp
ELF.cpp
@@ -18,6 +20,7 @@ add_llvm_library(LLVMObject
SymbolicFile.cpp
SymbolSize.cpp
WasmObjectFile.cpp
+ WindowsResource.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Object
diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp
new file mode 100644
index 000000000000..37962d84d855
--- /dev/null
+++ b/lib/Object/COFFImportFile.cpp
@@ -0,0 +1,527 @@
+//===- COFFImportFile.cpp - COFF short import file implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the writeImportLibrary function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/COFFImportFile.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/ArchiveWriter.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
+
+#include <cstdint>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+using namespace llvm::COFF;
+using namespace llvm::object;
+using namespace llvm;
+
+namespace llvm {
+namespace object {
+
+static bool is32bit(MachineTypes Machine) {
+ switch (Machine) {
+ default:
+ llvm_unreachable("unsupported machine");
+ case IMAGE_FILE_MACHINE_AMD64:
+ return false;
+ case IMAGE_FILE_MACHINE_ARMNT:
+ case IMAGE_FILE_MACHINE_I386:
+ return true;
+ }
+}
+
+static uint16_t getImgRelRelocation(MachineTypes Machine) {
+ switch (Machine) {
+ default:
+ llvm_unreachable("unsupported machine");
+ case IMAGE_FILE_MACHINE_AMD64:
+ return IMAGE_REL_AMD64_ADDR32NB;
+ case IMAGE_FILE_MACHINE_ARMNT:
+ return IMAGE_REL_ARM_ADDR32NB;
+ case IMAGE_FILE_MACHINE_I386:
+ return IMAGE_REL_I386_DIR32NB;
+ }
+}
+
+template <class T> static void append(std::vector<uint8_t> &B, const T &Data) {
+ size_t S = B.size();
+ B.resize(S + sizeof(T));
+ memcpy(&B[S], &Data, sizeof(T));
+}
+
+static void writeStringTable(std::vector<uint8_t> &B,
+ ArrayRef<const std::string> Strings) {
+ // The COFF string table consists of a 4-byte value which is the size of the
+ // table, including the length field itself. This value is followed by the
+ // string content itself, which is an array of null-terminated C-style
+ // strings. The termination is important as they are referenced to by offset
+ // by the symbol entity in the file format.
+
+ size_t Pos = B.size();
+ size_t Offset = B.size();
+
+ // Skip over the length field, we will fill it in later as we will have
+ // computed the length while emitting the string content itself.
+ Pos += sizeof(uint32_t);
+
+ for (const auto &S : Strings) {
+ B.resize(Pos + S.length() + 1);
+ strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str());
+ Pos += S.length() + 1;
+ }
+
+ // Backfill the length of the table now that it has been computed.
+ support::ulittle32_t Length(B.size() - Offset);
+ support::endian::write32le(&B[Offset], Length);
+}
+
+static ImportNameType getNameType(StringRef Sym, StringRef ExtName,
+ MachineTypes Machine) {
+ if (Sym != ExtName)
+ return IMPORT_NAME_UNDECORATE;
+ if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_"))
+ return IMPORT_NAME_NOPREFIX;
+ return IMPORT_NAME;
+}
+
+static Expected<std::string> replace(StringRef S, StringRef From,
+ StringRef To) {
+ size_t Pos = S.find(From);
+
+ // From and To may be mangled, but substrings in S may not.
+ if (Pos == StringRef::npos && From.startswith("_") && To.startswith("_")) {
+ From = From.substr(1);
+ To = To.substr(1);
+ Pos = S.find(From);
+ }
+
+ if (Pos == StringRef::npos) {
+ return make_error<StringError>(
+ StringRef(Twine(S + ": replacing '" + From +
+ "' with '" + To + "' failed").str()), object_error::parse_failed);
+ }
+
+ return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str();
+}
+
+static const std::string NullImportDescriptorSymbolName =
+ "__NULL_IMPORT_DESCRIPTOR";
+
+namespace {
+// This class constructs various small object files necessary to support linking
+// symbols imported from a DLL. The contents are pretty strictly defined and
+// nearly entirely static. The details of the structures files are defined in
+// WINNT.h and the PE/COFF specification.
+class ObjectFactory {
+ using u16 = support::ulittle16_t;
+ using u32 = support::ulittle32_t;
+ MachineTypes Machine;
+ BumpPtrAllocator Alloc;
+ StringRef DLLName;
+ StringRef Library;
+ std::string ImportDescriptorSymbolName;
+ std::string NullThunkSymbolName;
+
+public:
+ ObjectFactory(StringRef S, MachineTypes M)
+ : Machine(M), DLLName(S), Library(S.drop_back(4)),
+ ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()),
+ NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {}
+
+ // Creates an Import Descriptor. This is a small object file which contains a
+ // reference to the terminators and contains the library name (entry) for the
+ // import name table. It will force the linker to construct the necessary
+ // structure to import symbols from the DLL.
+ NewArchiveMember createImportDescriptor(std::vector<uint8_t> &Buffer);
+
+ // Creates a NULL import descriptor. This is a small object file whcih
+ // contains a NULL import descriptor. It is used to terminate the imports
+ // from a specific DLL.
+ NewArchiveMember createNullImportDescriptor(std::vector<uint8_t> &Buffer);
+
+ // Create a NULL Thunk Entry. This is a small object file which contains a
+ // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It
+ // is used to terminate the IAT and ILT.
+ NewArchiveMember createNullThunk(std::vector<uint8_t> &Buffer);
+
+ // Create a short import file which is described in PE/COFF spec 7. Import
+ // Library Format.
+ NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal,
+ ImportType Type, ImportNameType NameType);
+};
+} // namespace
+
+NewArchiveMember
+ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
+ static const uint32_t NumberOfSections = 2;
+ static const uint32_t NumberOfSymbols = 7;
+ static const uint32_t NumberOfRelocations = 3;
+
+ // COFF Header
+ coff_file_header Header{
+ u16(Machine),
+ u16(NumberOfSections),
+ u32(0),
+ u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
+ // .idata$2
+ sizeof(coff_import_directory_table_entry) +
+ NumberOfRelocations * sizeof(coff_relocation) +
+ // .idata$4
+ (DLLName.size() + 1)),
+ u32(NumberOfSymbols),
+ u16(0),
+ u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0),
+ };
+ append(Buffer, Header);
+
+ // Section Header Table
+ static const coff_section SectionTable[NumberOfSections] = {
+ {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'},
+ u32(0),
+ u32(0),
+ u32(sizeof(coff_import_directory_table_entry)),
+ u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)),
+ u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
+ sizeof(coff_import_directory_table_entry)),
+ u32(0),
+ u16(NumberOfRelocations),
+ u16(0),
+ u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
+ IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
+ {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'},
+ u32(0),
+ u32(0),
+ u32(DLLName.size() + 1),
+ u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
+ sizeof(coff_import_directory_table_entry) +
+ NumberOfRelocations * sizeof(coff_relocation)),
+ u32(0),
+ u32(0),
+ u16(0),
+ u16(0),
+ u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
+ IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
+ };
+ append(Buffer, SectionTable);
+
+ // .idata$2
+ static const coff_import_directory_table_entry ImportDescriptor{
+ u32(0), u32(0), u32(0), u32(0), u32(0),
+ };
+ append(Buffer, ImportDescriptor);
+
+ static const coff_relocation RelocationTable[NumberOfRelocations] = {
+ {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2),
+ u16(getImgRelRelocation(Machine))},
+ {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)),
+ u32(3), u16(getImgRelRelocation(Machine))},
+ {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)),
+ u32(4), u16(getImgRelRelocation(Machine))},
+ };
+ append(Buffer, RelocationTable);
+
+ // .idata$6
+ auto S = Buffer.size();
+ Buffer.resize(S + DLLName.size() + 1);
+ memcpy(&Buffer[S], DLLName.data(), DLLName.size());
+ Buffer[S + DLLName.size()] = '\0';
+
+ // Symbol Table
+ coff_symbol16 SymbolTable[NumberOfSymbols] = {
+ {{{0, 0, 0, 0, 0, 0, 0, 0}},
+ u32(0),
+ u16(1),
+ u16(0),
+ IMAGE_SYM_CLASS_EXTERNAL,
+ 0},
+ {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}},
+ u32(0),
+ u16(1),
+ u16(0),
+ IMAGE_SYM_CLASS_SECTION,
+ 0},
+ {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}},
+ u32(0),
+ u16(2),
+ u16(0),
+ IMAGE_SYM_CLASS_STATIC,
+ 0},
+ {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}},
+ u32(0),
+ u16(0),
+ u16(0),
+ IMAGE_SYM_CLASS_SECTION,
+ 0},
+ {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}},
+ u32(0),
+ u16(0),
+ u16(0),
+ IMAGE_SYM_CLASS_SECTION,
+ 0},
+ {{{0, 0, 0, 0, 0, 0, 0, 0}},
+ u32(0),
+ u16(0),
+ u16(0),
+ IMAGE_SYM_CLASS_EXTERNAL,
+ 0},
+ {{{0, 0, 0, 0, 0, 0, 0, 0}},
+ u32(0),
+ u16(0),
+ u16(0),
+ IMAGE_SYM_CLASS_EXTERNAL,
+ 0},
+ };
+ reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
+ sizeof(uint32_t);
+ reinterpret_cast<StringTableOffset &>(SymbolTable[5].Name).Offset =
+ sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1;
+ reinterpret_cast<StringTableOffset &>(SymbolTable[6].Name).Offset =
+ sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 +
+ NullImportDescriptorSymbolName.length() + 1;
+ append(Buffer, SymbolTable);
+
+ // String Table
+ writeStringTable(Buffer,
+ {ImportDescriptorSymbolName, NullImportDescriptorSymbolName,
+ NullThunkSymbolName});
+
+ StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
+ return {MemoryBufferRef(F, DLLName)};
+}
+
+NewArchiveMember
+ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
+ static const uint32_t NumberOfSections = 1;
+ static const uint32_t NumberOfSymbols = 1;
+
+ // COFF Header
+ coff_file_header Header{
+ u16(Machine),
+ u16(NumberOfSections),
+ u32(0),
+ u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
+ // .idata$3
+ sizeof(coff_import_directory_table_entry)),
+ u32(NumberOfSymbols),
+ u16(0),
+ u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0),
+ };
+ append(Buffer, Header);
+
+ // Section Header Table
+ static const coff_section SectionTable[NumberOfSections] = {
+ {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'},
+ u32(0),
+ u32(0),
+ u32(sizeof(coff_import_directory_table_entry)),
+ u32(sizeof(coff_file_header) +
+ (NumberOfSections * sizeof(coff_section))),
+ u32(0),
+ u32(0),
+ u16(0),
+ u16(0),
+ u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA |
+ IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)},
+ };
+ append(Buffer, SectionTable);
+
+ // .idata$3
+ static const coff_import_directory_table_entry ImportDescriptor{
+ u32(0), u32(0), u32(0), u32(0), u32(0),
+ };
+ append(Buffer, ImportDescriptor);
+
+ // Symbol Table
+ coff_symbol16 SymbolTable[NumberOfSymbols] = {
+ {{{0, 0, 0, 0, 0, 0, 0, 0}},
+ u32(0),
+ u16(1),
+ u16(0),
+ IMAGE_SYM_CLASS_EXTERNAL,
+ 0},
+ };
+ reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
+ sizeof(uint32_t);
+ append(Buffer, SymbolTable);
+
+ // String Table
+ writeStringTable(Buffer, {NullImportDescriptorSymbolName});
+
+ StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
+ return {MemoryBufferRef(F, DLLName)};
+}
+
+NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
+ static const uint32_t NumberOfSections = 2;
+ static const uint32_t NumberOfSymbols = 1;
+ uint32_t VASize = is32bit(Machine) ? 4 : 8;
+
+ // COFF Header
+ coff_file_header Header{
+ u16(Machine),
+ u16(NumberOfSections),
+ u32(0),
+ u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) +
+ // .idata$5
+ VASize +
+ // .idata$4
+ VASize),
+ u32(NumberOfSymbols),
+ u16(0),
+ u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0),
+ };
+ append(Buffer, Header);
+
+ // Section Header Table
+ static const coff_section SectionTable[NumberOfSections] = {
+ {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'},
+ u32(0),
+ u32(0),
+ u32(VASize),
+ u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)),
+ u32(0),
+ u32(0),
+ u16(0),
+ u16(0),
+ u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES
+ : IMAGE_SCN_ALIGN_8BYTES) |
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_WRITE)},
+ {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'},
+ u32(0),
+ u32(0),
+ u32(VASize),
+ u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
+ VASize),
+ u32(0),
+ u32(0),
+ u16(0),
+ u16(0),
+ u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES
+ : IMAGE_SCN_ALIGN_8BYTES) |
+ IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ |
+ IMAGE_SCN_MEM_WRITE)},
+ };
+ append(Buffer, SectionTable);
+
+ // .idata$5, ILT
+ append(Buffer, u32(0));
+ if (!is32bit(Machine))
+ append(Buffer, u32(0));
+
+ // .idata$4, IAT
+ append(Buffer, u32(0));
+ if (!is32bit(Machine))
+ append(Buffer, u32(0));
+
+ // Symbol Table
+ coff_symbol16 SymbolTable[NumberOfSymbols] = {
+ {{{0, 0, 0, 0, 0, 0, 0, 0}},
+ u32(0),
+ u16(1),
+ u16(0),
+ IMAGE_SYM_CLASS_EXTERNAL,
+ 0},
+ };
+ reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
+ sizeof(uint32_t);
+ append(Buffer, SymbolTable);
+
+ // String Table
+ writeStringTable(Buffer, {NullThunkSymbolName});
+
+ StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
+ return {MemoryBufferRef{F, DLLName}};
+}
+
+NewArchiveMember ObjectFactory::createShortImport(StringRef Sym,
+ uint16_t Ordinal,
+ ImportType ImportType,
+ ImportNameType NameType) {
+ size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs
+ size_t Size = sizeof(coff_import_header) + ImpSize;
+ char *Buf = Alloc.Allocate<char>(Size);
+ memset(Buf, 0, Size);
+ char *P = Buf;
+
+ // Write short import library.
+ auto *Imp = reinterpret_cast<coff_import_header *>(P);
+ P += sizeof(*Imp);
+ Imp->Sig2 = 0xFFFF;
+ Imp->Machine = Machine;
+ Imp->SizeOfData = ImpSize;
+ if (Ordinal > 0)
+ Imp->OrdinalHint = Ordinal;
+ Imp->TypeInfo = (NameType << 2) | ImportType;
+
+ // Write symbol name and DLL name.
+ memcpy(P, Sym.data(), Sym.size());
+ P += Sym.size() + 1;
+ memcpy(P, DLLName.data(), DLLName.size());
+
+ return {MemoryBufferRef(StringRef(Buf, Size), DLLName)};
+}
+
+std::error_code writeImportLibrary(StringRef DLLName, StringRef Path,
+ ArrayRef<COFFShortExport> Exports,
+ MachineTypes Machine) {
+
+ std::vector<NewArchiveMember> Members;
+ ObjectFactory OF(llvm::sys::path::filename(DLLName), Machine);
+
+ std::vector<uint8_t> ImportDescriptor;
+ Members.push_back(OF.createImportDescriptor(ImportDescriptor));
+
+ std::vector<uint8_t> NullImportDescriptor;
+ Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor));
+
+ std::vector<uint8_t> NullThunk;
+ Members.push_back(OF.createNullThunk(NullThunk));
+
+ for (COFFShortExport E : Exports) {
+ if (E.Private)
+ continue;
+
+ ImportType ImportType = IMPORT_CODE;
+ if (E.Data)
+ ImportType = IMPORT_DATA;
+ if (E.Constant)
+ ImportType = IMPORT_CONST;
+
+ StringRef SymbolName = E.isWeak() ? E.ExtName : E.Name;
+ ImportNameType NameType = getNameType(SymbolName, E.Name, Machine);
+ Expected<std::string> Name = E.ExtName.empty()
+ ? SymbolName
+ : replace(SymbolName, E.Name, E.ExtName);
+
+ if (!Name) {
+ return errorToErrorCode(Name.takeError());
+ }
+
+ Members.push_back(
+ OF.createShortImport(*Name, E.Ordinal, ImportType, NameType));
+ }
+
+ std::pair<StringRef, std::error_code> Result =
+ writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU,
+ /*Deterministic*/ true, /*Thin*/ false);
+
+ return Result.second;
+}
+
+} // namespace object
+} // namespace llvm
diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp
new file mode 100644
index 000000000000..0d69cb6b709c
--- /dev/null
+++ b/lib/Object/COFFModuleDefinition.cpp
@@ -0,0 +1,319 @@
+//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Windows-specific.
+// A parser for the module-definition file (.def file).
+//
+// The format of module-definition files are described in this document:
+// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/COFFModuleDefinition.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm::COFF;
+using namespace llvm;
+
+namespace llvm {
+namespace object {
+
+enum Kind {
+ Unknown,
+ Eof,
+ Identifier,
+ Comma,
+ Equal,
+ KwBase,
+ KwConstant,
+ KwData,
+ KwExports,
+ KwHeapsize,
+ KwLibrary,
+ KwName,
+ KwNoname,
+ KwPrivate,
+ KwStacksize,
+ KwVersion,
+};
+
+struct Token {
+ explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
+ Kind K;
+ StringRef Value;
+};
+
+static bool isDecorated(StringRef Sym) {
+ return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?");
+}
+
+static Error createError(const Twine &Err) {
+ return make_error<StringError>(StringRef(Err.str()),
+ object_error::parse_failed);
+}
+
+class Lexer {
+public:
+ Lexer(StringRef S) : Buf(S) {}
+
+ Token lex() {
+ Buf = Buf.trim();
+ if (Buf.empty())
+ return Token(Eof);
+
+ switch (Buf[0]) {
+ case '\0':
+ return Token(Eof);
+ case ';': {
+ size_t End = Buf.find('\n');
+ Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
+ return lex();
+ }
+ case '=':
+ Buf = Buf.drop_front();
+ return Token(Equal, "=");
+ case ',':
+ Buf = Buf.drop_front();
+ return Token(Comma, ",");
+ case '"': {
+ StringRef S;
+ std::tie(S, Buf) = Buf.substr(1).split('"');
+ return Token(Identifier, S);
+ }
+ default: {
+ size_t End = Buf.find_first_of("=,\r\n \t\v");
+ StringRef Word = Buf.substr(0, End);
+ Kind K = llvm::StringSwitch<Kind>(Word)
+ .Case("BASE", KwBase)
+ .Case("CONSTANT", KwConstant)
+ .Case("DATA", KwData)
+ .Case("EXPORTS", KwExports)
+ .Case("HEAPSIZE", KwHeapsize)
+ .Case("LIBRARY", KwLibrary)
+ .Case("NAME", KwName)
+ .Case("NONAME", KwNoname)
+ .Case("PRIVATE", KwPrivate)
+ .Case("STACKSIZE", KwStacksize)
+ .Case("VERSION", KwVersion)
+ .Default(Identifier);
+ Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
+ return Token(K, Word);
+ }
+ }
+ }
+
+private:
+ StringRef Buf;
+};
+
+class Parser {
+public:
+ explicit Parser(StringRef S, MachineTypes M) : Lex(S), Machine(M) {}
+
+ Expected<COFFModuleDefinition> parse() {
+ do {
+ if (Error Err = parseOne())
+ return std::move(Err);
+ } while (Tok.K != Eof);
+ return Info;
+ }
+
+private:
+ void read() {
+ if (Stack.empty()) {
+ Tok = Lex.lex();
+ return;
+ }
+ Tok = Stack.back();
+ Stack.pop_back();
+ }
+
+ Error readAsInt(uint64_t *I) {
+ read();
+ if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
+ return createError("integer expected");
+ return Error::success();
+ }
+
+ Error expect(Kind Expected, StringRef Msg) {
+ read();
+ if (Tok.K != Expected)
+ return createError(Msg);
+ return Error::success();
+ }
+
+ void unget() { Stack.push_back(Tok); }
+
+ Error parseOne() {
+ read();
+ switch (Tok.K) {
+ case Eof:
+ return Error::success();
+ case KwExports:
+ for (;;) {
+ read();
+ if (Tok.K != Identifier) {
+ unget();
+ return Error::success();
+ }
+ if (Error Err = parseExport())
+ return Err;
+ }
+ case KwHeapsize:
+ return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
+ case KwStacksize:
+ return parseNumbers(&Info.StackReserve, &Info.StackCommit);
+ case KwLibrary:
+ case KwName: {
+ bool IsDll = Tok.K == KwLibrary; // Check before parseName.
+ std::string Name;
+ if (Error Err = parseName(&Name, &Info.ImageBase))
+ return Err;
+ // Append the appropriate file extension if not already present.
+ StringRef Ext = IsDll ? ".dll" : ".exe";
+ if (!StringRef(Name).endswith_lower(Ext))
+ Name += Ext;
+
+ // Set the output file, but don't override /out if it was already passed.
+ if (Info.OutputFile.empty())
+ Info.OutputFile = Name;
+ return Error::success();
+ }
+ case KwVersion:
+ return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
+ default:
+ return createError("unknown directive: " + Tok.Value);
+ }
+ }
+
+ Error parseExport() {
+ COFFShortExport E;
+ E.Name = Tok.Value;
+ read();
+ if (Tok.K == Equal) {
+ read();
+ if (Tok.K != Identifier)
+ return createError("identifier expected, but got " + Tok.Value);
+ E.ExtName = E.Name;
+ E.Name = Tok.Value;
+ } else {
+ unget();
+ }
+
+ if (Machine == IMAGE_FILE_MACHINE_I386) {
+ if (!isDecorated(E.Name))
+ E.Name = (std::string("_").append(E.Name));
+ if (!E.ExtName.empty() && !isDecorated(E.ExtName))
+ E.ExtName = (std::string("_").append(E.ExtName));
+ }
+
+ for (;;) {
+ read();
+ if (Tok.K == Identifier && Tok.Value[0] == '@') {
+ Tok.Value.drop_front().getAsInteger(10, E.Ordinal);
+ read();
+ if (Tok.K == KwNoname) {
+ E.Noname = true;
+ } else {
+ unget();
+ }
+ continue;
+ }
+ if (Tok.K == KwData) {
+ E.Data = true;
+ continue;
+ }
+ if (Tok.K == KwConstant) {
+ E.Constant = true;
+ continue;
+ }
+ if (Tok.K == KwPrivate) {
+ E.Private = true;
+ continue;
+ }
+ unget();
+ Info.Exports.push_back(E);
+ return Error::success();
+ }
+ }
+
+ // HEAPSIZE/STACKSIZE reserve[,commit]
+ Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
+ if (Error Err = readAsInt(Reserve))
+ return Err;
+ read();
+ if (Tok.K != Comma) {
+ unget();
+ Commit = nullptr;
+ return Error::success();
+ }
+ if (Error Err = readAsInt(Commit))
+ return Err;
+ return Error::success();
+ }
+
+ // NAME outputPath [BASE=address]
+ Error parseName(std::string *Out, uint64_t *Baseaddr) {
+ read();
+ if (Tok.K == Identifier) {
+ *Out = Tok.Value;
+ } else {
+ *Out = "";
+ unget();
+ return Error::success();
+ }
+ read();
+ if (Tok.K == KwBase) {
+ if (Error Err = expect(Equal, "'=' expected"))
+ return Err;
+ if (Error Err = readAsInt(Baseaddr))
+ return Err;
+ } else {
+ unget();
+ *Baseaddr = 0;
+ }
+ return Error::success();
+ }
+
+ // VERSION major[.minor]
+ Error parseVersion(uint32_t *Major, uint32_t *Minor) {
+ read();
+ if (Tok.K != Identifier)
+ return createError("identifier expected, but got " + Tok.Value);
+ StringRef V1, V2;
+ std::tie(V1, V2) = Tok.Value.split('.');
+ if (V1.getAsInteger(10, *Major))
+ return createError("integer expected, but got " + Tok.Value);
+ if (V2.empty())
+ *Minor = 0;
+ else if (V2.getAsInteger(10, *Minor))
+ return createError("integer expected, but got " + Tok.Value);
+ return Error::success();
+ }
+
+ Lexer Lex;
+ Token Tok;
+ std::vector<Token> Stack;
+ MachineTypes Machine;
+ COFFModuleDefinition Info;
+};
+
+Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
+ MachineTypes Machine) {
+ return Parser(MB.getBuffer(), Machine).parse();
+}
+
+} // namespace object
+} // namespace llvm
diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp
index 0be602b1fc1a..89d199a3f3f6 100644
--- a/lib/Object/Decompressor.cpp
+++ b/lib/Object/Decompressor.cpp
@@ -88,11 +88,6 @@ bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) {
return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name);
}
-Error Decompressor::decompress(SmallString<32> &Out) {
- Out.resize(DecompressedSize);
- return decompress({Out.data(), (size_t)DecompressedSize});
-}
-
Error Decompressor::decompress(MutableArrayRef<char> Buffer) {
size_t Size = Buffer.size();
return zlib::uncompress(SectionData, Buffer.data(), Size);
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
new file mode 100644
index 000000000000..b52563469094
--- /dev/null
+++ b/lib/Object/WindowsResource.cpp
@@ -0,0 +1,90 @@
+//===-- WindowsResource.cpp -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the .res file class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/WindowsResource.h"
+#include "llvm/Object/Error.h"
+#include <system_error>
+
+namespace llvm {
+namespace object {
+
+static const size_t ResourceMagicSize = 16;
+
+static const size_t NullEntrySize = 16;
+
+#define RETURN_IF_ERROR(X) \
+ if (auto EC = X) \
+ return EC;
+
+WindowsResource::WindowsResource(MemoryBufferRef Source)
+ : Binary(Binary::ID_WinRes, Source) {
+ size_t LeadingSize = ResourceMagicSize + NullEntrySize;
+ BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize),
+ support::little);
+}
+
+WindowsResource::~WindowsResource() = default;
+
+Expected<std::unique_ptr<WindowsResource>>
+WindowsResource::createWindowsResource(MemoryBufferRef Source) {
+ if (Source.getBufferSize() < ResourceMagicSize + NullEntrySize)
+ return make_error<GenericBinaryError>(
+ "File too small to be a resource file",
+ object_error::invalid_file_type);
+ std::unique_ptr<WindowsResource> Ret(new WindowsResource(Source));
+ return std::move(Ret);
+}
+
+Expected<ResourceEntryRef> WindowsResource::getHeadEntry() {
+ Error Err = Error::success();
+ auto Ref = ResourceEntryRef(BinaryStreamRef(BBS), this, Err);
+ if (Err)
+ return std::move(Err);
+ return Ref;
+}
+
+ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref,
+ const WindowsResource *Owner, Error &Err)
+ : Reader(Ref), OwningRes(Owner) {
+ if (loadNext())
+ Err = make_error<GenericBinaryError>("Could not read first entry.",
+ object_error::unexpected_eof);
+}
+
+Error ResourceEntryRef::moveNext(bool &End) {
+ // Reached end of all the entries.
+ if (Reader.bytesRemaining() == 0) {
+ End = true;
+ return Error::success();
+ }
+ RETURN_IF_ERROR(loadNext());
+
+ return Error::success();
+}
+
+Error ResourceEntryRef::loadNext() {
+ uint32_t DataSize;
+ RETURN_IF_ERROR(Reader.readInteger(DataSize));
+ uint32_t HeaderSize;
+ RETURN_IF_ERROR(Reader.readInteger(HeaderSize));
+ // The data and header size ints are themselves part of the header, so we must
+ // subtract them from the size.
+ RETURN_IF_ERROR(
+ Reader.readStreamRef(HeaderBytes, HeaderSize - 2 * sizeof(uint32_t)));
+ RETURN_IF_ERROR(Reader.readStreamRef(DataBytes, DataSize));
+ RETURN_IF_ERROR(Reader.padToAlignment(sizeof(uint32_t)));
+ return Error::success();
+}
+
+} // namespace object
+} // namespace llvm
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 7076e751071d..6ece7965ce64 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -150,6 +150,10 @@ using namespace llvm;
static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations",
cl::ReallyHidden, cl::init(4));
+static cl::opt<bool>
+ RunPartialInlining("enable-npm-partial-inlining", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Run Partial inlinining pass"));
static cl::opt<bool> EnableGVNHoist(
"enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
@@ -552,6 +556,11 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// At this point, we expect to have canonical and simple IR which we begin
// *optimizing* for efficient execution going forward.
+ // Run partial inlining pass to partially inline functions that have
+ // large bodies.
+ if (RunPartialInlining)
+ MPM.addPass(PartialInlinerPass());
+
// Eliminate externally available functions now that inlining is over -- we
// won't emit these anyways.
MPM.addPass(EliminateAvailableExternallyPass());
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 17144522db82..2a916b14bc22 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1398,8 +1398,8 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
DEBUG(dbgs() << '\n');
}
-void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
- unsigned rhsWords, APInt *Quotient, APInt *Remainder) {
+void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
+ unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
assert(lhsWords >= rhsWords && "Fractional result");
// First, compose the values into an array of 32-bit words instead of
@@ -1436,7 +1436,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
// Initialize the dividend
memset(U, 0, (m+n+1)*sizeof(uint32_t));
for (unsigned i = 0; i < lhsWords; ++i) {
- uint64_t tmp = LHS.getRawData()[i];
+ uint64_t tmp = LHS[i];
U[i * 2] = Lo_32(tmp);
U[i * 2 + 1] = Hi_32(tmp);
}
@@ -1445,7 +1445,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
// Initialize the divisor
memset(V, 0, (n)*sizeof(uint32_t));
for (unsigned i = 0; i < rhsWords; ++i) {
- uint64_t tmp = RHS.getRawData()[i];
+ uint64_t tmp = RHS[i];
V[i * 2] = Lo_32(tmp);
V[i * 2 + 1] = Hi_32(tmp);
}
@@ -1502,48 +1502,14 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
// If the caller wants the quotient
if (Quotient) {
- // Set up the Quotient value's memory.
- Quotient->reallocate(LHS.BitWidth);
- // Clear out any previous bits.
- Quotient->clearAllBits();
-
- // The quotient is in Q. Reconstitute the quotient into Quotient's low
- // order words.
- // This case is currently dead as all users of divide() handle trivial cases
- // earlier.
- if (lhsWords == 1) {
- uint64_t tmp = Make_64(Q[1], Q[0]);
- if (Quotient->isSingleWord())
- Quotient->U.VAL = tmp;
- else
- Quotient->U.pVal[0] = tmp;
- } else {
- assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
- for (unsigned i = 0; i < lhsWords; ++i)
- Quotient->U.pVal[i] = Make_64(Q[i*2+1], Q[i*2]);
- }
+ for (unsigned i = 0; i < lhsWords; ++i)
+ Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
}
// If the caller wants the remainder
if (Remainder) {
- // Set up the Remainder value's memory.
- Remainder->reallocate(RHS.BitWidth);
- // Clear out any previous bits.
- Remainder->clearAllBits();
-
- // The remainder is in R. Reconstitute the remainder into Remainder's low
- // order words.
- if (rhsWords == 1) {
- uint64_t tmp = Make_64(R[1], R[0]);
- if (Remainder->isSingleWord())
- Remainder->U.VAL = tmp;
- else
- Remainder->U.pVal[0] = tmp;
- } else {
- assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
- for (unsigned i = 0; i < rhsWords; ++i)
- Remainder->U.pVal[i] = Make_64(R[i*2+1], R[i*2]);
- }
+ for (unsigned i = 0; i < rhsWords; ++i)
+ Remainder[i] = Make_64(R[i*2+1], R[i*2]);
}
// Clean up the memory we allocated.
@@ -1555,7 +1521,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
}
}
-APInt APInt::udiv(const APInt& RHS) const {
+APInt APInt::udiv(const APInt &RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
// First, deal with the easy case
@@ -1588,8 +1554,41 @@ APInt APInt::udiv(const APInt& RHS) const {
return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
// We have to compute it the hard way. Invoke the Knuth divide algorithm.
- APInt Quotient; // to hold result.
- divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr);
+ APInt Quotient(BitWidth, 0); // to hold result.
+ divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
+ return Quotient;
+}
+
+APInt APInt::udiv(uint64_t RHS) const {
+ assert(RHS != 0 && "Divide by zero?");
+
+ // First, deal with the easy case
+ if (isSingleWord())
+ return APInt(BitWidth, U.VAL / RHS);
+
+ // Get some facts about the LHS words.
+ unsigned lhsWords = getNumWords(getActiveBits());
+
+ // Deal with some degenerate cases
+ if (!lhsWords)
+ // 0 / X ===> 0
+ return APInt(BitWidth, 0);
+ if (RHS == 1)
+ // X / 1 ===> X
+ return *this;
+ if (this->ult(RHS))
+ // X / Y ===> 0, iff X < Y
+ return APInt(BitWidth, 0);
+ if (*this == RHS)
+ // X / X ===> 1
+ return APInt(BitWidth, 1);
+ if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
+ // All high words are zero, just use native divide
+ return APInt(BitWidth, this->U.pVal[0] / RHS);
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ APInt Quotient(BitWidth, 0); // to hold result.
+ divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
return Quotient;
}
@@ -1604,7 +1603,18 @@ APInt APInt::sdiv(const APInt &RHS) const {
return this->udiv(RHS);
}
-APInt APInt::urem(const APInt& RHS) const {
+APInt APInt::sdiv(int64_t RHS) const {
+ if (isNegative()) {
+ if (RHS < 0)
+ return (-(*this)).udiv(-RHS);
+ return -((-(*this)).udiv(RHS));
+ }
+ if (RHS < 0)
+ return -(this->udiv(-RHS));
+ return this->udiv(RHS);
+}
+
+APInt APInt::urem(const APInt &RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
if (isSingleWord()) {
assert(RHS.U.VAL != 0 && "Remainder by zero?");
@@ -1637,8 +1647,40 @@ APInt APInt::urem(const APInt& RHS) const {
return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
// We have to compute it the hard way. Invoke the Knuth divide algorithm.
- APInt Remainder;
- divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder);
+ APInt Remainder(BitWidth, 0);
+ divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
+ return Remainder;
+}
+
+uint64_t APInt::urem(uint64_t RHS) const {
+ assert(RHS != 0 && "Remainder by zero?");
+
+ if (isSingleWord())
+ return U.VAL % RHS;
+
+ // Get some facts about the LHS
+ unsigned lhsWords = getNumWords(getActiveBits());
+
+ // Check the degenerate cases
+ if (lhsWords == 0)
+ // 0 % Y ===> 0
+ return 0;
+ if (RHS == 1)
+ // X % 1 ===> 0
+ return 0;
+ if (this->ult(RHS))
+ // X % Y ===> X, iff X < Y
+ return getZExtValue();
+ if (*this == RHS)
+ // X % X == 0;
+ return 0;
+ if (lhsWords == 1)
+ // All high words are zero, just use native remainder
+ return U.pVal[0] % RHS;
+
+ // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+ uint64_t Remainder;
+ divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
return Remainder;
}
@@ -1653,6 +1695,17 @@ APInt APInt::srem(const APInt &RHS) const {
return this->urem(RHS);
}
+int64_t APInt::srem(int64_t RHS) const {
+ if (isNegative()) {
+ if (RHS < 0)
+ return -((-(*this)).urem(-RHS));
+ return -((-(*this)).urem(RHS));
+ }
+ if (RHS < 0)
+ return this->urem(-RHS);
+ return this->urem(RHS);
+}
+
void APInt::udivrem(const APInt &LHS, const APInt &RHS,
APInt &Quotient, APInt &Remainder) {
assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
@@ -1698,20 +1751,90 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
return;
}
+ // Make sure there is enough space to hold the results.
+ // NOTE: This assumes that reallocate won't affect any bits if it doesn't
+ // change the size. This is necessary if Quotient or Remainder is aliased
+ // with LHS or RHS.
+ Quotient.reallocate(BitWidth);
+ Remainder.reallocate(BitWidth);
+
if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
// There is only one word to consider so use the native versions.
uint64_t lhsValue = LHS.U.pVal[0];
uint64_t rhsValue = RHS.U.pVal[0];
- // Make sure there is enough space to hold the results.
- Quotient.reallocate(BitWidth);
- Remainder.reallocate(BitWidth);
Quotient = lhsValue / rhsValue;
Remainder = lhsValue % rhsValue;
return;
}
// Okay, lets do it the long way
- divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
+ divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
+ Remainder.U.pVal);
+ // Clear the rest of the Quotient and Remainder.
+ std::memset(Quotient.U.pVal + lhsWords, 0,
+ (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
+ std::memset(Remainder.U.pVal + rhsWords, 0,
+ (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
+}
+
+void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
+ uint64_t &Remainder) {
+ assert(RHS != 0 && "Divide by zero?");
+ unsigned BitWidth = LHS.BitWidth;
+
+ // First, deal with the easy case
+ if (LHS.isSingleWord()) {
+ uint64_t QuotVal = LHS.U.VAL / RHS;
+ Remainder = LHS.U.VAL % RHS;
+ Quotient = APInt(BitWidth, QuotVal);
+ return;
+ }
+
+ // Get some size facts about the dividend and divisor
+ unsigned lhsWords = getNumWords(LHS.getActiveBits());
+
+ // Check the degenerate cases
+ if (lhsWords == 0) {
+ Quotient = 0; // 0 / Y ===> 0
+ Remainder = 0; // 0 % Y ===> 0
+ return;
+ }
+
+ if (RHS == 1) {
+ Quotient = LHS; // X / 1 ===> X
+ Remainder = 0; // X % 1 ===> 0
+ }
+
+ if (LHS.ult(RHS)) {
+ Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
+ Quotient = 0; // X / Y ===> 0, iff X < Y
+ return;
+ }
+
+ if (LHS == RHS) {
+ Quotient = 1; // X / X ===> 1
+ Remainder = 0; // X % X ===> 0;
+ return;
+ }
+
+ // Make sure there is enough space to hold the results.
+ // NOTE: This assumes that reallocate won't affect any bits if it doesn't
+ // change the size. This is necessary if Quotient is aliased with LHS.
+ Quotient.reallocate(BitWidth);
+
+ if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
+ // There is only one word to consider so use the native versions.
+ uint64_t lhsValue = LHS.U.pVal[0];
+ Quotient = lhsValue / RHS;
+ Remainder = lhsValue % RHS;
+ return;
+ }
+
+ // Okay, lets do it the long way
+ divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
+ // Clear the rest of the Quotient.
+ std::memset(Quotient.U.pVal + lhsWords, 0,
+ (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
}
void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
@@ -1732,6 +1855,26 @@ void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
}
}
+void APInt::sdivrem(const APInt &LHS, int64_t RHS,
+ APInt &Quotient, int64_t &Remainder) {
+ uint64_t R = Remainder;
+ if (LHS.isNegative()) {
+ if (RHS < 0)
+ APInt::udivrem(-LHS, -RHS, Quotient, R);
+ else {
+ APInt::udivrem(-LHS, RHS, Quotient, R);
+ Quotient.negate();
+ }
+ R = -R;
+ } else if (RHS < 0) {
+ APInt::udivrem(LHS, -RHS, Quotient, R);
+ Quotient.negate();
+ } else {
+ APInt::udivrem(LHS, RHS, Quotient, R);
+ }
+ Remainder = R;
+}
+
APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
APInt Res = *this+RHS;
Overflow = isNonNegative() == RHS.isNonNegative() &&
@@ -1962,11 +2105,9 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
Tmp.lshrInPlace(ShiftAmt);
}
} else {
- APInt divisor(Tmp.getBitWidth(), Radix);
- APInt APdigit;
while (Tmp.getBoolValue()) {
- udivrem(Tmp, divisor, Tmp, APdigit);
- unsigned Digit = (unsigned)APdigit.getZExtValue();
+ uint64_t Digit;
+ udivrem(Tmp, Radix, Tmp, Digit);
assert(Digit < Radix && "divide failed");
Str.push_back(Digits[Digit]);
}
diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp
index 702d98770e05..5c277448a765 100644
--- a/lib/Support/BinaryStreamReader.cpp
+++ b/lib/Support/BinaryStreamReader.cpp
@@ -13,9 +13,18 @@
#include "llvm/Support/BinaryStreamRef.h"
using namespace llvm;
+using endianness = llvm::support::endianness;
-BinaryStreamReader::BinaryStreamReader(BinaryStreamRef S)
- : Stream(S), Offset(0) {}
+BinaryStreamReader::BinaryStreamReader(BinaryStreamRef Ref) : Stream(Ref) {}
+
+BinaryStreamReader::BinaryStreamReader(BinaryStream &Stream) : Stream(Stream) {}
+
+BinaryStreamReader::BinaryStreamReader(ArrayRef<uint8_t> Data,
+ endianness Endian)
+ : Stream(Data, Endian) {}
+
+BinaryStreamReader::BinaryStreamReader(StringRef Data, endianness Endian)
+ : Stream(Data, Endian) {}
Error BinaryStreamReader::readLongestContiguousChunk(
ArrayRef<uint8_t> &Buffer) {
@@ -86,6 +95,11 @@ Error BinaryStreamReader::skip(uint32_t Amount) {
return Error::success();
}
+Error BinaryStreamReader::padToAlignment(uint32_t Align) {
+ uint32_t NewOffset = alignTo(Offset, Align);
+ return skip(NewOffset - Offset);
+}
+
uint8_t BinaryStreamReader::peek() const {
ArrayRef<uint8_t> Buffer;
auto EC = Stream.readBytes(Offset, 1, Buffer);
diff --git a/lib/Support/BinaryStreamRef.cpp b/lib/Support/BinaryStreamRef.cpp
new file mode 100644
index 000000000000..fe9a8171e146
--- /dev/null
+++ b/lib/Support/BinaryStreamRef.cpp
@@ -0,0 +1,137 @@
+//===- BinaryStreamRef.cpp - ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/BinaryByteStream.h"
+
+using namespace llvm;
+using namespace llvm::support;
+
+namespace {
+
+class ArrayRefImpl : public BinaryStream {
+public:
+ ArrayRefImpl(ArrayRef<uint8_t> Data, endianness Endian) : BBS(Data, Endian) {}
+
+ llvm::support::endianness getEndian() const override {
+ return BBS.getEndian();
+ }
+ Error readBytes(uint32_t Offset, uint32_t Size,
+ ArrayRef<uint8_t> &Buffer) override {
+ return BBS.readBytes(Offset, Size, Buffer);
+ }
+ Error readLongestContiguousChunk(uint32_t Offset,
+ ArrayRef<uint8_t> &Buffer) override {
+ return BBS.readLongestContiguousChunk(Offset, Buffer);
+ }
+ uint32_t getLength() override { return BBS.getLength(); }
+
+private:
+ BinaryByteStream BBS;
+};
+
+class MutableArrayRefImpl : public WritableBinaryStream {
+public:
+ MutableArrayRefImpl(MutableArrayRef<uint8_t> Data, endianness Endian)
+ : BBS(Data, Endian) {}
+
+ // Inherited via WritableBinaryStream
+ llvm::support::endianness getEndian() const override {
+ return BBS.getEndian();
+ }
+ Error readBytes(uint32_t Offset, uint32_t Size,
+ ArrayRef<uint8_t> &Buffer) override {
+ return BBS.readBytes(Offset, Size, Buffer);
+ }
+ Error readLongestContiguousChunk(uint32_t Offset,
+ ArrayRef<uint8_t> &Buffer) override {
+ return BBS.readLongestContiguousChunk(Offset, Buffer);
+ }
+ uint32_t getLength() override { return BBS.getLength(); }
+
+ Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) override {
+ return BBS.writeBytes(Offset, Data);
+ }
+ Error commit() override { return BBS.commit(); }
+
+private:
+ MutableBinaryByteStream BBS;
+};
+}
+
+BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream)
+ : BinaryStreamRef(Stream, 0, Stream.getLength()) {}
+BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream, uint32_t Offset,
+ uint32_t Length)
+ : BinaryStreamRefBase(Stream, Offset, Length) {}
+BinaryStreamRef::BinaryStreamRef(ArrayRef<uint8_t> Data, endianness Endian)
+ : BinaryStreamRefBase(std::make_shared<ArrayRefImpl>(Data, Endian), 0,
+ Data.size()) {}
+BinaryStreamRef::BinaryStreamRef(StringRef Data, endianness Endian)
+ : BinaryStreamRef(makeArrayRef(Data.bytes_begin(), Data.bytes_end()),
+ Endian) {}
+
+BinaryStreamRef::BinaryStreamRef(const BinaryStreamRef &Other)
+ : BinaryStreamRefBase(Other) {}
+
+Error BinaryStreamRef::readBytes(uint32_t Offset, uint32_t Size,
+ ArrayRef<uint8_t> &Buffer) const {
+ if (auto EC = checkOffset(Offset, Size))
+ return EC;
+ return BorrowedImpl->readBytes(ViewOffset + Offset, Size, Buffer);
+}
+
+Error BinaryStreamRef::readLongestContiguousChunk(
+ uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
+ if (auto EC = checkOffset(Offset, 1))
+ return EC;
+
+ if (auto EC =
+ BorrowedImpl->readLongestContiguousChunk(ViewOffset + Offset, Buffer))
+ return EC;
+ // This StreamRef might refer to a smaller window over a larger stream. In
+ // that case we will have read out more bytes than we should return, because
+ // we should not read past the end of the current view.
+ uint32_t MaxLength = Length - Offset;
+ if (Buffer.size() > MaxLength)
+ Buffer = Buffer.slice(0, MaxLength);
+ return Error::success();
+}
+
+WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream)
+ : WritableBinaryStreamRef(Stream, 0, Stream.getLength()) {}
+
+WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream,
+ uint32_t Offset,
+ uint32_t Length)
+ : BinaryStreamRefBase(Stream, Offset, Length) {}
+
+WritableBinaryStreamRef::WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data,
+ endianness Endian)
+ : BinaryStreamRefBase(std::make_shared<MutableArrayRefImpl>(Data, Endian),
+ 0, Data.size()) {}
+
+WritableBinaryStreamRef::WritableBinaryStreamRef(
+ const WritableBinaryStreamRef &Other)
+ : BinaryStreamRefBase(Other) {}
+
+Error WritableBinaryStreamRef::writeBytes(uint32_t Offset,
+ ArrayRef<uint8_t> Data) const {
+ if (auto EC = checkOffset(Offset, Data.size()))
+ return EC;
+
+ return BorrowedImpl->writeBytes(ViewOffset + Offset, Data);
+}
+
+WritableBinaryStreamRef::operator BinaryStreamRef() const {
+ return BinaryStreamRef(*BorrowedImpl, ViewOffset, Length);
+}
+
+/// \brief For buffered streams, commits changes to the backing store.
+Error WritableBinaryStreamRef::commit() { return BorrowedImpl->commit(); }
diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp
index d78dbc68f593..b22eb1ed12d0 100644
--- a/lib/Support/BinaryStreamWriter.cpp
+++ b/lib/Support/BinaryStreamWriter.cpp
@@ -15,8 +15,15 @@
using namespace llvm;
-BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStreamRef S)
- : Stream(S), Offset(0) {}
+BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStreamRef Ref)
+ : Stream(Ref) {}
+
+BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStream &Stream)
+ : Stream(Stream) {}
+
+BinaryStreamWriter::BinaryStreamWriter(MutableArrayRef<uint8_t> Data,
+ llvm::support::endianness Endian)
+ : Stream(Data, Endian) {}
Error BinaryStreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) {
if (auto EC = Stream.writeBytes(Offset, Buffer))
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 83376284548f..a12ba4fbfda8 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_library(LLVMSupport
Allocator.cpp
BinaryStreamError.cpp
BinaryStreamReader.cpp
+ BinaryStreamRef.cpp
BinaryStreamWriter.cpp
BlockFrequency.cpp
BranchProbability.cpp
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index 2ed71c7e4311..c01659604444 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -32,6 +32,7 @@ static void UpdatePosition(std::pair<unsigned, unsigned> &Position, const char *
switch (*Ptr) {
case '\n':
Line += 1;
+ LLVM_FALLTHROUGH;
case '\r':
Column = 0;
break;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index eb8108908ac5..b0e3d6898cae 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -1472,6 +1472,24 @@ bool Triple::isLittleEndian() const {
}
}
+bool Triple::isCompatibleWith(const Triple &Other) const {
+ // If vendor is apple, ignore the version number.
+ if (getVendor() == Triple::Apple)
+ return getArch() == Other.getArch() && getSubArch() == Other.getSubArch() &&
+ getVendor() == Other.getVendor() && getOS() == Other.getOS();
+
+ return *this == Other;
+}
+
+std::string Triple::merge(const Triple &Other) const {
+ // If vendor is apple, pick the triple with the larger version number.
+ if (getVendor() == Triple::Apple)
+ if (Other.isOSVersionLT(*this))
+ return str();
+
+ return Other.str();
+}
+
StringRef Triple::getARMCPUForArch(StringRef MArch) const {
if (MArch.empty())
MArch = getArchName();
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 5ddf66654a67..da68f3165c5e 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -315,8 +315,14 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
// AArch64 Instruction Predicate Definitions.
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
-def ForCodeSize : Predicate<"Subtarget->getForCodeSize()">;
-def NotForCodeSize : Predicate<"!Subtarget->getForCodeSize()">;
+
+// We could compute these on a per-module basis but doing so requires accessing
+// the Function object through the <Target>Subtarget and objections were raised
+// to that (see post-commit review comments for r301750).
+let RecomputePerFunction = 1 in {
+ def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">;
+ def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">;
+}
include "AArch64InstrFormats.td"
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 1c81d34014fd..b369ee7e4ba2 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -164,12 +164,11 @@ struct AArch64GISelActualAccessor : public GISelAccessor {
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
- const TargetMachine &TM, bool LittleEndian,
- bool ForCodeSize)
+ const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()),
IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
- TLInfo(TM, *this), GISel(), ForCodeSize(ForCodeSize) {
+ TLInfo(TM, *this), GISel() {
#ifndef LLVM_BUILD_GLOBAL_ISEL
GISelAccessor *AArch64GISel = new GISelAccessor();
#else
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index df54bf3f48e1..7933e58c49ee 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -128,8 +128,6 @@ protected:
/// an optional library.
std::unique_ptr<GISelAccessor> GISel;
- bool ForCodeSize;
-
private:
/// initializeSubtargetDependencies - Initializes using CPUString and the
/// passed in feature string so that we can use initializer lists for
@@ -145,7 +143,7 @@ public:
/// of the specified triple.
AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM,
- bool LittleEndian, bool ForCodeSize);
+ bool LittleEndian);
/// This object will take onwership of \p GISelAccessor.
void setGISelAccessor(GISelAccessor &GISel) {
@@ -274,8 +272,6 @@ public:
}
}
- bool getForCodeSize() const { return ForCodeSize; }
-
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 5a90fd1eb1ba..132f192f2a9a 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -214,7 +214,6 @@ const AArch64Subtarget *
AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- bool ForCodeSize = F.optForSize();
std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
? CPUAttr.getValueAsString().str()
@@ -222,17 +221,15 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
std::string FS = !FSAttr.hasAttribute(Attribute::None)
? FSAttr.getValueAsString().str()
: TargetFS;
- std::string ForCodeSizeStr =
- std::string(ForCodeSize ? "+" : "-") + "forcodesize";
- auto &I = SubtargetMap[CPU + FS + ForCodeSizeStr];
+ auto &I = SubtargetMap[CPU + FS];
if (!I) {
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
- isLittle, ForCodeSize);
+ isLittle);
}
return I.get();
}
@@ -324,7 +321,7 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
void AArch64PassConfig::addIRPasses() {
// Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
// ourselves.
- addPass(createAtomicExpandPass(TM));
+ addPass(createAtomicExpandPass());
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
@@ -343,7 +340,7 @@ void AArch64PassConfig::addIRPasses() {
// Match interleaved memory accesses to ldN/stN intrinsics.
if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createInterleavedAccessPass(TM));
+ addPass(createInterleavedAccessPass());
if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 3f89702bed50..78ff3bbe3d1a 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -27,12 +27,12 @@ class PassRegistry;
class Module;
// R600 Passes
-FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
-FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600VectorRegMerger();
+FunctionPass *createR600ExpandSpecialInstrsPass();
FunctionPass *createR600EmitClauseMarkers();
-FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
-FunctionPass *createR600Packetizer(TargetMachine &tm);
-FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
+FunctionPass *createR600ClauseMergePass();
+FunctionPass *createR600Packetizer();
+FunctionPass *createR600ControlFlowFinalizer();
FunctionPass *createAMDGPUCFGStructurizerPass();
// SI Passes
@@ -42,24 +42,24 @@ FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSIShrinkInstructionsPass();
-FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
+FunctionPass *createSILoadStoreOptimizerPass();
FunctionPass *createSIWholeQuadModePass();
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitsPass();
FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
+FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
extern char &AMDGPUMachineCFGStructurizerID;
-ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
+ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
-ModulePass *createAMDGPULowerIntrinsicsPass(const TargetMachine *TM = nullptr);
+ModulePass *createAMDGPULowerIntrinsicsPass();
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
extern char &AMDGPULowerIntrinsicsID;
@@ -97,7 +97,7 @@ void initializeSIOptimizeExecMaskingPass(PassRegistry &);
extern char &SIOptimizeExecMaskingID;
// Passes common to R600 and SI
-FunctionPass *createAMDGPUPromoteAlloca(const TargetMachine *TM = nullptr);
+FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
extern char &AMDGPUPromoteAllocaID;
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 3d8db7cd8af5..7235d8fae332 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -27,7 +28,6 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public ModulePass {
private:
- const TargetMachine *TM;
AMDGPUAS AS;
static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
@@ -37,8 +37,7 @@ private:
public:
static char ID;
- AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) :
- ModulePass(ID), TM(TM_) {}
+ AMDGPUAnnotateKernelFeatures() : ModulePass(ID) {}
bool runOnModule(Module &M) override;
StringRef getPassName() const override {
return "AMDGPU Annotate Kernel Features";
@@ -221,8 +220,10 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
if (F.hasFnAttribute("amdgpu-queue-ptr"))
continue;
- bool HasApertureRegs =
- TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ bool HasApertureRegs = TPC && TPC->getTM<TargetMachine>()
+ .getSubtarget<AMDGPUSubtarget>(F)
+ .hasApertureRegs();
if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
F.addFnAttr("amdgpu-queue-ptr");
}
@@ -231,6 +232,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
return Changed;
}
-ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) {
- return new AMDGPUAnnotateKernelFeatures(TM);
+ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
+ return new AMDGPUAnnotateKernelFeatures();
}
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 09bdf8ffcde7..251cb7a2c440 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -38,7 +38,8 @@ class AMDGPUCallLowering: public CallLowering {
unsigned VReg) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
+ static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
+ static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
};
} // End of namespace llvm;
#endif
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td
index d308f718aae1..4bef7a89bfe3 100644
--- a/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -13,6 +13,8 @@
// Inversion of CCIfInReg
class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
+class CCIfExtend<CCAction A>
+ : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
// Calling convention for SI
def CC_SI : CallingConv<[
@@ -52,7 +54,7 @@ def CC_SI : CallingConv<[
]>>>
]>;
-def RetCC_SI : CallingConv<[
+def RetCC_SI_Shader : CallingConv<[
CCIfType<[i32] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
@@ -99,6 +101,52 @@ def CC_AMDGPU_Kernel : CallingConv<[
CCCustom<"allocateKernArg">
]>;
+def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<
+ (sequence "VGPR%u", 24, 255)
+>;
+
+def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
+ (sequence "VGPR%u", 32, 255)
+>;
+
+def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs<
+ (sequence "SGPR%u", 32, 103)
+>;
+
+def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
+ (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103)
+>;
+
+// Calling convention for leaf functions
+def CC_AMDGPU_Func : CallingConv<[
+ CCIfByVal<CCPassByVal<4, 4>>,
+ CCIfType<[i1], CCPromoteToType<i32>>,
+ CCIfType<[i1, i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
+ CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>,
+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
+ CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
+ CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
+ CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
+ CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
+]>;
+
+// Calling convention for leaf functions
+def RetCC_AMDGPU_Func : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i32>>,
+ CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
+ CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>
+]>;
+
def CC_AMDGPU : CallingConv<[
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >="
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index e19314fe0a6c..d923cb117c12 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -48,7 +49,6 @@ namespace {
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
- const GCNTargetMachine *TM;
const SISubtarget *ST = nullptr;
DivergenceAnalysis *DA = nullptr;
Module *Mod = nullptr;
@@ -127,8 +127,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
public:
static char ID;
- AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
- FunctionPass(ID), TM(static_cast<const GCNTargetMachine *>(TM)) {}
+ AMDGPUCodeGenPrepare() : FunctionPass(ID) {}
bool visitFDiv(BinaryOperator &I);
@@ -487,10 +486,15 @@ bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
}
bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
- if (!TM || skipFunction(F))
+ if (skipFunction(F))
return false;
- ST = &TM->getSubtarget<SISubtarget>(F);
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ const TargetMachine &TM = TPC->getTM<TargetMachine>();
+ ST = &TM.getSubtarget<SISubtarget>(F);
DA = &getAnalysis<DivergenceAnalysis>();
HasUnsafeFPMath = hasUnsafeFPMath(F);
@@ -507,14 +511,14 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
return MadeChange;
}
-INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
-INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE,
- "AMDGPU IR optimizations", false, false)
+INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
+ false, false)
char AMDGPUCodeGenPrepare::ID = 0;
-FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) {
- return new AMDGPUCodeGenPrepare(TM);
+FunctionPass *llvm::createAMDGPUCodeGenPreparePass() {
+ return new AMDGPUCodeGenPrepare();
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c3ac796a0a44..19fce064783d 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -82,7 +82,7 @@ public:
void PostprocessISelDAG() override;
private:
- SDValue foldFrameIndex(SDValue N) const;
+ std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
@@ -116,9 +116,11 @@ private:
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
- bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
+ bool SelectMUBUFScratchOffen(SDNode *Root,
+ SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
- bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ bool SelectMUBUFScratchOffset(SDNode *Root,
+ SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
@@ -1074,13 +1076,33 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
-SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
- if (auto FI = dyn_cast<FrameIndexSDNode>(N))
- return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
- return N;
+static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
+ auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
+ return PSV && PSV->isStack();
+}
+
+std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
+ const MachineFunction &MF = CurDAG->getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
+ FI->getValueType(0));
+
+ // If we can resolve this to a frame index access, this is relative to the
+ // frame pointer SGPR.
+ return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
+ MVT::i32));
+ }
+
+ // If we don't know this private access is a local stack object, it needs to
+ // be relative to the entry point's scratch wave offset register.
+ return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
+ MVT::i32));
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root,
+ SDValue Addr, SDValue &Rsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &ImmOffset) const {
@@ -1089,7 +1111,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
- SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
unsigned Imm = CAddr->getZExtValue();
@@ -1100,6 +1121,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
VAddr = SDValue(MovHighBits, 0);
+
+ // In a call sequence, stores to the argument stack area are relative to the
+ // stack pointer.
+ const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo();
+ unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
+ Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
+
+ SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
return true;
}
@@ -1113,19 +1142,20 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
// Offsets in vaddr must be positive.
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
if (isLegalMUBUFImmOffset(C1)) {
- VAddr = foldFrameIndex(N0);
+ std::tie(VAddr, SOffset) = foldFrameIndex(N0);
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
}
// (node)
- VAddr = foldFrameIndex(Addr);
+ std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root,
+ SDValue Addr,
SDValue &SRsrc,
SDValue &SOffset,
SDValue &Offset) const {
@@ -1138,7 +1168,15 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr,
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
- SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
+
+ const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo();
+ unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
+ Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
+
+ // FIXME: Get from MachinePointerInfo? We should only be using the frame
+ // offset if we know this is in a call sequence.
+ SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
+
Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
return true;
}
@@ -1700,12 +1738,46 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
return true;
}
+static SDValue stripBitcast(SDValue Val) {
+ return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
+}
+
+// Figure out if this is really an extract of the high 16-bits of a dword.
+static bool isExtractHiElt(SDValue In, SDValue &Out) {
+ In = stripBitcast(In);
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue Srl = In.getOperand(0);
+ if (Srl.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+ if (ShiftAmt->getZExtValue() == 16) {
+ Out = stripBitcast(Srl.getOperand(0));
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+// Look through operations that obscure just looking at the low 16-bits of the
+// same register.
+static SDValue stripExtractLoElt(SDValue In) {
+ if (In.getOpcode() == ISD::TRUNCATE) {
+ SDValue Src = In.getOperand(0);
+ if (Src.getValueType().getSizeInBits() == 32)
+ return stripBitcast(Src);
+ }
+
+ return In;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
Src = In;
- // FIXME: Look for on separate components
if (Src.getOpcode() == ISD::FNEG) {
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
Src = Src.getOperand(0);
@@ -1714,19 +1786,28 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
if (Src.getOpcode() == ISD::BUILD_VECTOR) {
unsigned VecMods = Mods;
- SDValue Lo = Src.getOperand(0);
- SDValue Hi = Src.getOperand(1);
+ SDValue Lo = stripBitcast(Src.getOperand(0));
+ SDValue Hi = stripBitcast(Src.getOperand(1));
if (Lo.getOpcode() == ISD::FNEG) {
- Lo = Lo.getOperand(0);
+ Lo = stripBitcast(Lo.getOperand(0));
Mods ^= SISrcMods::NEG;
}
if (Hi.getOpcode() == ISD::FNEG) {
- Hi = Hi.getOperand(0);
+ Hi = stripBitcast(Hi.getOperand(0));
Mods ^= SISrcMods::NEG_HI;
}
+ if (isExtractHiElt(Lo, Lo))
+ Mods |= SISrcMods::OP_SEL_0;
+
+ if (isExtractHiElt(Hi, Hi))
+ Mods |= SISrcMods::OP_SEL_1;
+
+ Lo = stripExtractLoElt(Lo);
+ Hi = stripExtractLoElt(Hi);
+
if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
// Really a scalar input. Just select from the low half of the register to
// avoid packing.
@@ -1740,9 +1821,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
}
// Packed instructions do not have abs modifiers.
-
- // FIXME: Handle abs/neg of individual components.
- // FIXME: Handle swizzling with op_sel
Mods |= SISrcMods::OP_SEL_1;
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f80652b87373..5ec46a8294c0 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -76,6 +76,45 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+// Allocate up to VGPR31.
+//
+// TODO: Since there are no VGPR alignent requirements would it be better to
+// split into individual scalar registers?
+static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ switch (LocVT.SimpleTy) {
+ case MVT::i64:
+ case MVT::f64:
+ case MVT::v2i32:
+ case MVT::v2f32: {
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::VReg_64RegClass, 31);
+ }
+ case MVT::v4i32:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v2f64: {
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::VReg_128RegClass, 29);
+ }
+ case MVT::v8i32:
+ case MVT::v8f32: {
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::VReg_256RegClass, 25);
+
+ }
+ case MVT::v16i32:
+ case MVT::v16f32: {
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::VReg_512RegClass, 17);
+
+ }
+ default:
+ return false;
+ }
+}
+
#include "AMDGPUGenCallingConv.inc"
// Find a larger type to do a load / store of a vector with.
@@ -773,8 +812,43 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
//===---------------------------------------------------------------------===//
CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
- bool IsVarArg) const {
- return CC_AMDGPU;
+ bool IsVarArg) {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return CC_AMDGPU_Kernel;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ return CC_AMDGPU;
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return CC_AMDGPU_Func;
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ }
+}
+
+CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
+ bool IsVarArg) {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return CC_AMDGPU_Kernel;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ return RetCC_SI_Shader;
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return RetCC_AMDGPU_Func;
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ }
}
/// The SelectionDAGBuilder will automatically promote function arguments
@@ -874,18 +948,15 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State,
}
}
-void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
- const SmallVectorImpl<ISD::OutputArg> &Outs) const {
-
- State.AnalyzeReturn(Outs, RetCC_SI);
-}
-
-SDValue
-AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SDLoc &DL, SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerReturn(
+ SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &DL, SelectionDAG &DAG) const {
+ // FIXME: Fails for r600 tests
+ //assert(!isVarArg && Outs.empty() && OutVals.empty() &&
+ // "wave terminate should not have return values");
return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);
}
@@ -896,20 +967,12 @@ AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) {
- switch (CC) {
- case CallingConv::C:
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::SPIR_KERNEL:
- return CC_AMDGPU_Kernel;
- case CallingConv::AMDGPU_VS:
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_GS:
- case CallingConv::AMDGPU_PS:
- case CallingConv::AMDGPU_CS:
- return CC_AMDGPU;
- default:
- report_fatal_error("Unsupported calling convention.");
- }
+ return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg);
+}
+
+CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
+ bool IsVarArg) {
+ return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg);
}
SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
@@ -2532,27 +2595,49 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- if (N->getValueType(0) != MVT::i64)
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
return SDValue();
- // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
-
- // On some subtargets, 64-bit shift is a quarter rate instruction. In the
- // common case, splitting this into a move and a 32-bit shift is faster and
- // the same code size.
- const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!RHS)
return SDValue();
- unsigned RHSVal = RHS->getZExtValue();
- if (RHSVal < 32)
- return SDValue();
-
SDValue LHS = N->getOperand(0);
+ unsigned RHSVal = RHS->getZExtValue();
+ if (!RHSVal)
+ return LHS;
SDLoc SL(N);
SelectionDAG &DAG = DCI.DAG;
+ switch (LHS->getOpcode()) {
+ default:
+ break;
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND: {
+ // shl (ext x) => zext (shl x), if shift does not overflow int
+ KnownBits Known;
+ SDValue X = LHS->getOperand(0);
+ DAG.computeKnownBits(X, Known);
+ unsigned LZ = Known.countMinLeadingZeros();
+ if (LZ < RHSVal)
+ break;
+ EVT XVT = X.getValueType();
+ SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
+ return DAG.getZExtOrTrunc(Shl, SL, VT);
+ }
+ }
+
+ // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
+
+ // On some subtargets, 64-bit shift is a quarter rate instruction. In the
+ // common case, splitting this into a move and a 32-bit shift is faster and
+ // the same code size.
+ if (RHSVal < 32)
+ return SDValue();
+
SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 4c588a7bafd0..fb2f15022d25 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -115,9 +115,6 @@ protected:
SmallVectorImpl<SDValue> &Results) const;
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
- void AnalyzeReturn(CCState &State,
- const SmallVectorImpl<ISD::OutputArg> &Outs) const;
-
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
@@ -164,6 +161,8 @@ public:
bool isCheapToSpeculateCtlz() const override;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
+ static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 353cc5742791..e286558ce60d 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -380,6 +380,6 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index dcb6670621ee..846e7dff5f8c 100644
--- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -9,6 +9,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -25,15 +26,13 @@ const unsigned MaxStaticSize = 1024;
class AMDGPULowerIntrinsics : public ModulePass {
private:
- const TargetMachine *TM;
-
bool makeLIDRangeMetadata(Function &F) const;
public:
static char ID;
- AMDGPULowerIntrinsics(const TargetMachine *TM = nullptr)
- : ModulePass(ID), TM(TM) { }
+ AMDGPULowerIntrinsics() : ModulePass(ID) {}
+
bool runOnModule(Module &M) override;
StringRef getPassName() const override {
return "AMDGPU Lower Intrinsics";
@@ -46,8 +45,8 @@ char AMDGPULowerIntrinsics::ID = 0;
char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID;
-INITIALIZE_TM_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE,
- "Lower intrinsics", false, false)
+INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false,
+ false)
// TODO: Should refine based on estimated number of accesses (e.g. does it
// require splitting based on alignment)
@@ -104,11 +103,13 @@ static bool expandMemIntrinsicUses(Function &F) {
}
bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
- if (!TM)
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
return false;
+ const TargetMachine &TM = TPC->getTM<TargetMachine>();
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(F);
bool Changed = false;
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
for (auto *U : F.users()) {
auto *CI = dyn_cast<CallInst>(U);
@@ -155,6 +156,6 @@ bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
return Changed;
}
-ModulePass *llvm::createAMDGPULowerIntrinsicsPass(const TargetMachine *TM) {
- return new AMDGPULowerIntrinsics(TM);
+ModulePass *llvm::createAMDGPULowerIntrinsicsPass() {
+ return new AMDGPULowerIntrinsics();
}
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index da247fea7de6..f1ef6281c90f 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -126,9 +126,15 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
}
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ unsigned Opcode = MI->getOpcode();
- int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
+ // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We
+ // need to select it to the subtarget specific version, and there's no way to
+ // do that with a single pseudo source operation.
+ if (Opcode == AMDGPU::S_SETPC_B64_return)
+ Opcode = AMDGPU::S_SETPC_B64;
+ int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(Opcode);
if (MCOpcode == -1) {
LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index fe7283ccf7d9..9fb7f5f88927 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -12,21 +12,6 @@
using namespace llvm;
-static bool isEntryFunctionCC(CallingConv::ID CC) {
- switch (CC) {
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::SPIR_KERNEL:
- case CallingConv::AMDGPU_VS:
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_GS:
- case CallingConv::AMDGPU_PS:
- case CallingConv::AMDGPU_CS:
- return true;
- default:
- return false;
- }
-}
-
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
LocalMemoryObjects(),
@@ -34,7 +19,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MaxKernArgAlign(0),
LDSSize(0),
ABIArgOffset(0),
- IsEntryFunction(isEntryFunctionCC(MF.getFunction()->getCallingConv())),
+ IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction()->getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
// except reserved size is not correctly aligned.
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e40f39557747..85184b363905 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -99,8 +100,7 @@ private:
public:
static char ID;
- AMDGPUPromoteAlloca(const TargetMachine *TM_ = nullptr) :
- FunctionPass(ID), TM(TM_) {}
+ AMDGPUPromoteAlloca() : FunctionPass(ID) {}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@@ -119,30 +119,31 @@ public:
char AMDGPUPromoteAlloca::ID = 0;
-INITIALIZE_TM_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
- "AMDGPU promote alloca to vector or LDS", false, false)
+INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
+ "AMDGPU promote alloca to vector or LDS", false, false)
char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
- if (!TM)
- return false;
-
Mod = &M;
DL = &Mod->getDataLayout();
- const Triple &TT = TM->getTargetTriple();
-
- IsAMDGCN = TT.getArch() == Triple::amdgcn;
- IsAMDHSA = TT.getOS() == Triple::AMDHSA;
-
return false;
}
bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
- if (!TM || skipFunction(F))
+ if (skipFunction(F))
return false;
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
+ TM = &TPC->getTM<TargetMachine>();
+ else
+ return false;
+
+ const Triple &TT = TM->getTargetTriple();
+ IsAMDGCN = TT.getArch() == Triple::amdgcn;
+ IsAMDHSA = TT.getOS() == Triple::AMDHSA;
+
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
if (!ST.isPromoteAllocaEnabled())
return false;
@@ -874,6 +875,6 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
}
}
-FunctionPass *llvm::createAMDGPUPromoteAlloca(const TargetMachine *TM) {
- return new AMDGPUPromoteAlloca(TM);
+FunctionPass *llvm::createAMDGPUPromoteAlloca() {
+ return new AMDGPUPromoteAlloca();
}
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index 941f2d8a468a..b2867fcc49f9 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -14,6 +14,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIRegisterInfo.h"
using namespace llvm;
@@ -24,18 +25,6 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
// they are not supported at this time.
//===----------------------------------------------------------------------===//
-// Dummy to not crash RegisterClassInfo.
-static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
-
-const MCPhysReg *AMDGPURegisterInfo::getCalleeSavedRegs(
- const MachineFunction *) const {
- return &CalleeSavedReg;
-}
-
-unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return AMDGPU::NoRegister;
-}
-
unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
static const unsigned SubRegs[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
@@ -50,3 +39,35 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
#define GET_REGINFO_TARGET_DESC
#include "AMDGPUGenRegisterInfo.inc"
+
+
+// Forced to be here by one .inc
+const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
+ const MachineFunction *MF) const {
+ CallingConv::ID CC = MF->getFunction()->getCallingConv();
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return CSR_AMDGPU_HighRegs_SaveList;
+ default: {
+ // Dummy to not crash RegisterClassInfo.
+ static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
+ return &NoCalleeSavedReg;
+ }
+ }
+}
+
+const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return CSR_AMDGPU_HighRegs_RegMask;
+ default:
+ return nullptr;
+ }
+}
+
+unsigned SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return AMDGPU::NoRegister;
+}
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 22b1663821d9..d8604d2590f1 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -30,9 +30,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
unsigned getSubRegFromChannel(unsigned Channel) const;
-
- const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 386a88b0520f..a9d3a31a7240 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -570,7 +570,7 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
- addPass(createAMDGPULowerIntrinsicsPass(&TM));
+ addPass(createAMDGPULowerIntrinsicsPass());
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
@@ -585,8 +585,7 @@ void AMDGPUPassConfig::addIRPasses() {
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
// TODO: May want to move later or split into an early and late one.
- addPass(createAMDGPUCodeGenPreparePass(
- static_cast<const GCNTargetMachine *>(&TM)));
+ addPass(createAMDGPUCodeGenPreparePass());
}
// Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
@@ -594,7 +593,7 @@ void AMDGPUPassConfig::addIRPasses() {
if (TM.getOptLevel() > CodeGenOpt::None) {
addPass(createInferAddressSpacesPass());
- addPass(createAMDGPUPromoteAlloca(&TM));
+ addPass(createAMDGPUPromoteAlloca());
if (EnableSROA)
addPass(createSROAPass());
@@ -664,22 +663,22 @@ bool R600PassConfig::addPreISel() {
}
void R600PassConfig::addPreRegAlloc() {
- addPass(createR600VectorRegMerger(*TM));
+ addPass(createR600VectorRegMerger());
}
void R600PassConfig::addPreSched2() {
addPass(createR600EmitClauseMarkers(), false);
if (EnableR600IfConvert)
addPass(&IfConverterID, false);
- addPass(createR600ClauseMergePass(*TM), false);
+ addPass(createR600ClauseMergePass(), false);
}
void R600PassConfig::addPreEmitPass() {
addPass(createAMDGPUCFGStructurizerPass(), false);
- addPass(createR600ExpandSpecialInstrsPass(*TM), false);
+ addPass(createR600ExpandSpecialInstrsPass(), false);
addPass(&FinalizeMachineBundlesID, false);
- addPass(createR600Packetizer(*TM), false);
- addPass(createR600ControlFlowFinalizer(*TM), false);
+ addPass(createR600Packetizer(), false);
+ addPass(createR600ControlFlowFinalizer(), false);
}
TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
@@ -703,8 +702,7 @@ bool GCNPassConfig::addPreISel() {
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
- const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
- addPass(createAMDGPUAnnotateKernelFeaturesPass(&TM));
+ addPass(createAMDGPUAnnotateKernelFeaturesPass());
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 70c848f3c7bd..b52ea2b3a2c6 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -2796,6 +2796,7 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
+ unsigned OperandIdx[4];
unsigned EnMask = 0;
int SrcIdx = 0;
@@ -2804,15 +2805,18 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
// Add the register arguments
if (Op.isReg()) {
- EnMask |= (1 << SrcIdx);
+ assert(SrcIdx < 4);
+ OperandIdx[SrcIdx] = Inst.size();
Op.addRegOperands(Inst, 1);
++SrcIdx;
continue;
}
if (Op.isOff()) {
- ++SrcIdx;
+ assert(SrcIdx < 4);
+ OperandIdx[SrcIdx] = Inst.size();
Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
+ ++SrcIdx;
continue;
}
@@ -2828,6 +2832,22 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
OptionalIdx[Op.getImmTy()] = i;
}
+ assert(SrcIdx == 4);
+
+ bool Compr = false;
+ if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
+ Compr = true;
+ Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
+ Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
+ Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
+ }
+
+ for (auto i = 0; i < SrcIdx; ++i) {
+ if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
+ EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
+ }
+ }
+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
@@ -3642,6 +3662,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
+ {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
{"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
{"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
{"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td
index 89eddb9ce961..2aca65ac8430 100644
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -11,8 +11,8 @@ def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
-def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen">;
-def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [], 20>;
+def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantRoot]>;
+def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantRoot], 20>;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 4fb03b62bba9..137b5cca96ce 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -126,6 +126,7 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
assert(MI.getOpcode() == 0);
assert(MI.getNumOperands() == 0);
MCInst TmpInst;
+ HasLiteral = false;
const auto SavedBytes = Bytes;
if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
MI = TmpInst;
@@ -343,10 +344,15 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
// ToDo: deal with float/double constants
- if (Bytes.size() < 4)
- return errOperand(0, "cannot read literal, inst bytes left " +
- Twine(Bytes.size()));
- return MCOperand::createImm(eatBytes<uint32_t>(Bytes));
+ if (!HasLiteral) {
+ if (Bytes.size() < 4) {
+ return errOperand(0, "cannot read literal, inst bytes left " +
+ Twine(Bytes.size()));
+ }
+ HasLiteral = true;
+ Literal = eatBytes<uint32_t>(Bytes);
+ }
+ return MCOperand::createImm(Literal);
}
MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index d50665187e10..620bae0a6d1a 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -39,6 +39,8 @@ class Twine;
class AMDGPUDisassembler : public MCDisassembler {
private:
mutable ArrayRef<uint8_t> Bytes;
+ mutable uint32_t Literal;
+ mutable bool HasLiteral;
public:
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
index 8066428fe44a..18374dca3f84 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "GCNRegPressure.h"
+#include "llvm/CodeGen/RegisterPressure.h"
using namespace llvm;
@@ -63,15 +64,6 @@ static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
return true;
}
-static GCNRPTracker::LiveRegSet
-stripEmpty(const GCNRPTracker::LiveRegSet &LR) {
- GCNRPTracker::LiveRegSet Res;
- for (const auto &P : LR) {
- if (P.second.any())
- Res.insert(P);
- }
- return Res;
-}
#endif
///////////////////////////////////////////////////////////////////////////////
@@ -185,6 +177,64 @@ void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const {
}
#endif
+
+static LaneBitmask getDefRegMask(const MachineOperand &MO,
+ const MachineRegisterInfo &MRI) {
+ assert(MO.isDef() && MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()));
+
+ // We don't rely on read-undef flag because in case of tentative schedule
+ // tracking it isn't set correctly yet. This works correctly however since
+ // use mask has been tracked before using LIS.
+ return MO.getSubReg() == 0 ?
+ MRI.getMaxLaneMaskForVReg(MO.getReg()) :
+ MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg());
+}
+
+static LaneBitmask getUsedRegMask(const MachineOperand &MO,
+ const MachineRegisterInfo &MRI,
+ const LiveIntervals &LIS) {
+ assert(MO.isUse() && MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()));
+
+ if (auto SubReg = MO.getSubReg())
+ return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
+
+ auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg());
+ if (MaxMask.getAsInteger() == 1) // cannot have subregs
+ return MaxMask;
+
+ // For a tentative schedule LIS isn't updated yet but livemask should remain
+ // the same on any schedule. Subreg defs can be reordered but they all must
+ // dominate uses anyway.
+ auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
+ return getLiveLaneMask(MO.getReg(), SI, LIS, MRI);
+}
+
+SmallVector<RegisterMaskPair, 8> collectVirtualRegUses(const MachineInstr &MI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
+ SmallVector<RegisterMaskPair, 8> Res;
+ for (const auto &MO : MI.operands()) {
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (!MO.isUse() || !MO.readsReg())
+ continue;
+
+ auto const UsedMask = getUsedRegMask(MO, MRI, LIS);
+
+ auto Reg = MO.getReg();
+ auto I = std::find_if(Res.begin(), Res.end(), [Reg](const RegisterMaskPair &RM) {
+ return RM.RegUnit == Reg;
+ });
+ if (I != Res.end())
+ I->LaneMask |= UsedMask;
+ else
+ Res.push_back(RegisterMaskPair(Reg, UsedMask));
+ }
+ return Res;
+}
+
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
@@ -222,36 +272,6 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
return LiveRegs;
}
-LaneBitmask GCNRPTracker::getDefRegMask(const MachineOperand &MO) const {
- assert(MO.isDef() && MO.isReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()));
-
- // We don't rely on read-undef flag because in case of tentative schedule
- // tracking it isn't set correctly yet. This works correctly however since
- // use mask has been tracked before using LIS.
- return MO.getSubReg() == 0 ?
- MRI->getMaxLaneMaskForVReg(MO.getReg()) :
- MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg());
-}
-
-LaneBitmask GCNRPTracker::getUsedRegMask(const MachineOperand &MO) const {
- assert(MO.isUse() && MO.isReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()));
-
- if (auto SubReg = MO.getSubReg())
- return MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
-
- auto MaxMask = MRI->getMaxLaneMaskForVReg(MO.getReg());
- if (MaxMask.getAsInteger() == 1) // cannot have subregs
- return MaxMask;
-
- // For a tentative schedule LIS isn't updated yet but livemask should remain
- // the same on any schedule. Subreg defs can be reordered but they all must
- // dominate uses anyway.
- auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
- return getLiveLaneMask(MO.getReg(), SI, LIS, *MRI);
-}
-
void GCNUpwardRPTracker::reset(const MachineInstr &MI,
const LiveRegSet *LiveRegsCopy) {
MRI = &MI.getParent()->getParent()->getRegInfo();
@@ -272,34 +292,40 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (MI.isDebugValue())
return;
- // process all defs first to ensure early clobbers are handled correctly
- // iterating over operands() to catch implicit defs
- for (const auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() ||
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
+ auto const RegUses = collectVirtualRegUses(MI, LIS, *MRI);
- auto Reg = MO.getReg();
- auto &LiveMask = LiveRegs[Reg];
- auto PrevMask = LiveMask;
- LiveMask &= ~getDefRegMask(MO);
- CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ // calc pressure at the MI (defs + uses)
+ auto AtMIPressure = CurPressure;
+ for (const auto &U : RegUses) {
+ auto LiveMask = LiveRegs[U.RegUnit];
+ AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI);
}
+ // update max pressure
+ MaxPressure = max(AtMIPressure, MaxPressure);
- // then all uses
- for (const auto &MO : MI.uses()) {
- if (!MO.isReg() || !MO.readsReg() ||
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ for (const auto &MO : MI.defs()) {
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()) ||
+ MO.isDead())
continue;
auto Reg = MO.getReg();
- auto &LiveMask = LiveRegs[Reg];
+ auto I = LiveRegs.find(Reg);
+ if (I == LiveRegs.end())
+ continue;
+ auto &LiveMask = I->second;
auto PrevMask = LiveMask;
- LiveMask |= getUsedRegMask(MO);
+ LiveMask &= ~getDefRegMask(MO, *MRI);
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ if (LiveMask.none())
+ LiveRegs.erase(I);
}
-
- MaxPressure = max(MaxPressure, CurPressure);
+ for (const auto &U : RegUses) {
+ auto &LiveMask = LiveRegs[U.RegUnit];
+ auto PrevMask = LiveMask;
+ LiveMask |= U.LaneMask;
+ CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
+ }
+ assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
@@ -368,7 +394,7 @@ void GCNDownwardRPTracker::advanceToNext() {
continue;
auto &LiveMask = LiveRegs[Reg];
auto PrevMask = LiveMask;
- LiveMask |= getDefRegMask(MO);
+ LiveMask |= getDefRegMask(MO, *MRI);
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
}
@@ -430,7 +456,7 @@ static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
bool GCNUpwardRPTracker::isValid() const {
const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
- const auto TrackedLR = stripEmpty(LiveRegs);
+ const auto &TrackedLR = LiveRegs;
if (!isEqual(LISLR, TrackedLR)) {
dbgs() << "\nGCNUpwardRPTracker error: Tracked and"
diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h
index 9875ca6a6d16..5dfe44053e72 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/lib/Target/AMDGPU/GCNRegPressure.h
@@ -98,8 +98,6 @@ protected:
const MachineInstr *LastTrackedMI = nullptr;
mutable const MachineRegisterInfo *MRI = nullptr;
GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {}
- LaneBitmask getDefRegMask(const MachineOperand &MO) const;
- LaneBitmask getUsedRegMask(const MachineOperand &MO) const;
public:
// live regs for the current state
const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
diff --git a/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index d0aba38f786d..fbe45cb222d9 100644
--- a/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -62,7 +62,7 @@ private:
const MachineInstr &LatrCFAlu) const;
public:
- R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
+ R600ClauseMergePass() : MachineFunctionPass(ID) { }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -208,6 +208,6 @@ StringRef R600ClauseMergePass::getPassName() const {
} // end anonymous namespace
-llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
- return new R600ClauseMergePass(TM);
+llvm::FunctionPass *llvm::createR600ClauseMergePass() {
+ return new R600ClauseMergePass();
}
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 811b905588b4..09b328765604 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -499,7 +499,7 @@ private:
}
public:
- R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID) {}
+ R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override {
ST = &MF.getSubtarget<R600Subtarget>();
@@ -706,6 +706,6 @@ char R600ControlFlowFinalizer::ID = 0;
} // end anonymous namespace
-FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
- return new R600ControlFlowFinalizer(TM);
+FunctionPass *llvm::createR600ControlFlowFinalizer() {
+ return new R600ControlFlowFinalizer();
}
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 3e46e6387614..5c30a0734f0d 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -37,7 +37,7 @@ private:
unsigned Op);
public:
- R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
+ R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID),
TII(nullptr) { }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -51,8 +51,8 @@ public:
char R600ExpandSpecialInstrsPass::ID = 0;
-FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
- return new R600ExpandSpecialInstrsPass(TM);
+FunctionPass *llvm::createR600ExpandSpecialInstrsPass() {
+ return new R600ExpandSpecialInstrsPass();
}
void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index d90008a550ae..502dd3bce97e 100644
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -124,7 +124,7 @@ private:
public:
static char ID;
- R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID),
+ R600VectorRegMerger() : MachineFunctionPass(ID),
TII(nullptr) { }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -396,6 +396,6 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
return false;
}
-llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) {
- return new R600VectorRegMerger(tm);
+llvm::FunctionPass *llvm::createR600VectorRegMerger() {
+ return new R600VectorRegMerger();
}
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index 5b6dd1ed128d..3e957126b497 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -36,7 +36,7 @@ class R600Packetizer : public MachineFunctionPass {
public:
static char ID;
- R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
+ R600Packetizer() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -404,6 +404,6 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
} // end anonymous namespace
-llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
- return new R600Packetizer(tm);
+llvm::FunctionPass *llvm::createR600Packetizer() {
+ return new R600Packetizer();
}
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp
index dfdc602b80cd..7501facb0cba 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -56,6 +56,18 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+// Dummy to not crash RegisterClassInfo.
+static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
+
+const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
+ const MachineFunction *) const {
+ return &CalleeSavedReg;
+}
+
+unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return AMDGPU::NoRegister;
+}
+
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h
index 9dfb3106c6cc..f0d9644b02f2 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -27,6 +27,8 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo {
R600RegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 1279f845de0e..97bb0f0c0656 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -189,8 +189,6 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
// ----
// 13 (+1)
unsigned ReservedRegCount = 13;
- if (SPReg != AMDGPU::NoRegister)
- ++ReservedRegCount;
if (AllSGPRs.size() < ReservedRegCount)
return std::make_pair(ScratchWaveOffsetReg, SPReg);
@@ -208,13 +206,6 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
MFI->setScratchWaveOffsetReg(Reg);
ScratchWaveOffsetReg = Reg;
- } else {
- if (SPReg == AMDGPU::NoRegister)
- break;
-
- MRI.replaceRegWith(SPReg, Reg);
- MFI->setStackPtrOffsetReg(Reg);
- SPReg = Reg;
break;
}
}
@@ -223,8 +214,8 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
return std::make_pair(ScratchWaveOffsetReg, SPReg);
}
-void SIFrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
// specified.
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
@@ -424,6 +415,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+void SIFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI->isEntryFunction())
+ emitEntryFunctionPrologue(MF, MBB);
+}
+
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index 7ccd02b3c86a..e17adbe27361 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -26,6 +26,8 @@ public:
AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
~SIFrameLowering() override = default;
+ void emitEntryFunctionPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const;
void emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 286be355bc14..01c1f78e7ca4 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -914,6 +914,55 @@ SDValue SITargetLowering::lowerKernargMemParameter(
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
}
+SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
+ const SDLoc &SL, SDValue Chain,
+ const ISD::InputArg &Arg) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ if (Arg.Flags.isByVal()) {
+ unsigned Size = Arg.Flags.getByValSize();
+ int FrameIdx = MFI.CreateFixedObject(Size, VA.getLocMemOffset(), false);
+ return DAG.getFrameIndex(FrameIdx, MVT::i32);
+ }
+
+ unsigned ArgOffset = VA.getLocMemOffset();
+ unsigned ArgSize = VA.getValVT().getStoreSize();
+
+ int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, true);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ SDValue ArgValue;
+
+ // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ MVT MemVT = VA.getValVT();
+
+ switch (VA.getLocInfo()) {
+ default:
+ break;
+ case CCValAssign::BCvt:
+ MemVT = VA.getLocVT();
+ break;
+ case CCValAssign::SExt:
+ ExtType = ISD::SEXTLOAD;
+ break;
+ case CCValAssign::ZExt:
+ ExtType = ISD::ZEXTLOAD;
+ break;
+ case CCValAssign::AExt:
+ ExtType = ISD::EXTLOAD;
+ break;
+ }
+
+ ArgValue = DAG.getExtLoad(
+ ExtType, SL, VA.getLocVT(), Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ MemVT);
+ return ArgValue;
+}
+
static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
CallingConv::ID CallConv,
ArrayRef<ISD::InputArg> Ins,
@@ -1094,10 +1143,12 @@ static void allocateSystemSGPRs(CCState &CCInfo,
static void reservePrivateMemoryRegs(const TargetMachine &TM,
MachineFunction &MF,
const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+ SIMachineFunctionInfo &Info,
+ bool NeedSP) {
// Now that we've figured out where the scratch register inputs are, see if
// should reserve the arguments and use them directly.
- bool HasStackObjects = MF.getFrameInfo().hasStackObjects();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool HasStackObjects = MFI.hasStackObjects();
// Record that we know we have non-spill stack objects so we don't need to
// check all stack objects later.
@@ -1155,6 +1206,15 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
}
+
+ if (NeedSP){
+ unsigned ReservedStackPtrOffsetReg = TRI.reservedStackPtrOffsetReg(MF);
+ Info.setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
+
+ assert(Info.getStackPtrOffsetReg() != Info.getFrameOffsetReg());
+ assert(!TRI.isSubRegister(Info.getScratchRSrcReg(),
+ Info.getStackPtrOffsetReg()));
+ }
}
SDValue SITargetLowering::LowerFormalArguments(
@@ -1223,8 +1283,10 @@ SDValue SITargetLowering::LowerFormalArguments(
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
!Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
!Info->hasWorkItemIDZ());
+ } else if (IsKernel) {
+ assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
- assert(!IsKernel || (Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()));
+ Splits.append(Ins.begin(), Ins.end());
}
if (IsEntryFunc) {
@@ -1278,11 +1340,14 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Arg);
continue;
+ } else if (!IsEntryFunc && VA.isMemLoc()) {
+ SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg);
+ InVals.push_back(Val);
+ if (!Arg.Flags.isByVal())
+ Chains.push_back(Val.getValue(1));
+ continue;
}
- if (VA.isMemLoc())
- report_fatal_error("memloc not supported with calling convention");
-
assert(VA.isRegLoc() && "Parameter must be in a register!");
unsigned Reg = VA.getLocReg();
@@ -1291,7 +1356,7 @@ SDValue SITargetLowering::LowerFormalArguments(
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- if (Arg.VT.isVector()) {
+ if (IsShader && Arg.VT.isVector()) {
// Build a vector from the registers
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
@@ -1317,16 +1382,49 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+
+ // TODO: Could maybe omit SP if only tail calls?
+ bool NeedSP = FrameInfo.hasCalls() || FrameInfo.hasVarSizedObjects();
+
// Start adding system SGPRs.
- if (IsEntryFunc)
+ if (IsEntryFunc) {
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
-
- reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
+ reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info, NeedSP);
+ } else {
+ CCInfo.AllocateReg(Info->getScratchRSrcReg());
+ CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
+ CCInfo.AllocateReg(Info->getFrameOffsetReg());
+
+ if (NeedSP) {
+ unsigned StackPtrReg = findFirstFreeSGPR(CCInfo);
+ CCInfo.AllocateReg(StackPtrReg);
+ Info->setStackPtrOffsetReg(StackPtrReg);
+ }
+ }
return Chains.empty() ? Chain :
DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
+// TODO: If return values can't fit in registers, we should return as many as
+// possible in registers before passing on stack.
+bool SITargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv,
+ MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ // Replacing returns with sret/stack usage doesn't make sense for shaders.
+ // FIXME: Also sort of a workaround for custom vector splitting in LowerReturn
+ // for shaders. Vector types should be explicitly handled by CC.
+ if (AMDGPU::isEntryFunctionCC(CallConv))
+ return true;
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
+}
+
SDValue
SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -1336,11 +1434,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- if (!AMDGPU::isShader(CallConv))
+ if (AMDGPU::isKernel(CallConv)) {
return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
OutVals, DL, DAG);
+ }
+
+ bool IsShader = AMDGPU::isShader(CallConv);
Info->setIfReturnsVoid(Outs.size() == 0);
+ bool IsWaveEnd = Info->returnsVoid() && IsShader;
SmallVector<ISD::OutputArg, 48> Splits;
SmallVector<SDValue, 48> SplitVals;
@@ -1349,7 +1451,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
const ISD::OutputArg &Out = Outs[i];
- if (Out.VT.isVector()) {
+ if (IsShader && Out.VT.isVector()) {
MVT VT = Out.VT.getVectorElementType();
ISD::OutputArg NewOut = Out;
NewOut.Flags.setSplit();
@@ -1380,29 +1482,58 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
*DAG.getContext());
// Analyze outgoing return values.
- AnalyzeReturn(CCInfo, Splits);
+ CCInfo.AnalyzeReturn(Splits, CCAssignFnForReturn(CallConv, isVarArg));
SDValue Flag;
SmallVector<SDValue, 48> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Add return address for callable functions.
+ if (!Info->isEntryFunction()) {
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+ SDValue ReturnAddrReg = CreateLiveInRegister(
+ DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
+
+ // FIXME: Should be able to use a vreg here, but need a way to prevent it
+ // from being allcoated to a CSR.
+
+ SDValue PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF),
+ MVT::i64);
+
+ Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag);
+ Flag = Chain.getValue(1);
+
+ RetOps.push_back(PhysReturnAddrReg);
+ }
+
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
+ // TODO: Partially return in registers if return values don't fit.
SDValue Arg = SplitVals[realRVLocIdx];
// Copied from other backends.
switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ default:
+ llvm_unreachable("Unknown loc info!");
}
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
@@ -1410,12 +1541,16 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ // FIXME: Does sret work properly?
+
// Update chain and glue.
RetOps[0] = Chain;
if (Flag.getNode())
RetOps.push_back(Flag);
- unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN_TO_EPILOG;
+ unsigned Opc = AMDGPUISD::ENDPGM;
+ if (!IsWaveEnd)
+ Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
@@ -2660,6 +2795,15 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue Vec = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
+ DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
+
+ // Make sure we we do any optimizations that will make it easier to fold
+ // source modifiers before obscuring it with bit operations.
+
+ // XXX - Why doesn't this get called when vector_shuffle is expanded?
+ if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
+ return Combined;
+
if (const ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index 046e677756d1..e68837747491 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -28,6 +28,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
uint64_t Offset, bool Signed,
const ISD::InputArg *Arg = nullptr) const;
+ SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
+ const SDLoc &SL, SDValue Chain,
+ const ISD::InputArg &Arg) const;
+
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
@@ -177,7 +181,12 @@ public:
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td
index b83a1fe187eb..02c9b4b1f0ee 100644
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -228,10 +228,10 @@ class EXPe : Enc64 {
bits<1> compr;
bits<1> done;
bits<1> vm;
- bits<8> vsrc0;
- bits<8> vsrc1;
- bits<8> vsrc2;
- bits<8> vsrc3;
+ bits<8> src0;
+ bits<8> src1;
+ bits<8> src2;
+ bits<8> src3;
let Inst{3-0} = en;
let Inst{9-4} = tgt;
@@ -239,10 +239,10 @@ class EXPe : Enc64 {
let Inst{11} = done;
let Inst{12} = vm;
let Inst{31-26} = 0x3e;
- let Inst{39-32} = vsrc0;
- let Inst{47-40} = vsrc1;
- let Inst{55-48} = vsrc2;
- let Inst{63-56} = vsrc3;
+ let Inst{39-32} = src0;
+ let Inst{47-40} = src1;
+ let Inst{55-48} = src2;
+ let Inst{63-56} = src3;
}
let Uses = [EXEC] in {
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 933a16646746..c6ad61a325cc 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -97,9 +97,7 @@ private:
public:
static char ID;
- SILoadStoreOptimizer() : MachineFunctionPass(ID) {}
-
- SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
+ SILoadStoreOptimizer() : MachineFunctionPass(ID) {
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
}
@@ -129,8 +127,8 @@ char SILoadStoreOptimizer::ID = 0;
char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID;
-FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) {
- return new SILoadStoreOptimizer(TM);
+FunctionPass *llvm::createSILoadStoreOptimizerPass() {
+ return new SILoadStoreOptimizer();
}
static void moveInstsAfter(MachineBasicBlock::iterator I,
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index adebb8c4a1c5..18b197ddb7ae 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -80,17 +80,22 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
WavesPerEU = ST.getWavesPerEU(*F);
- // Non-entry functions have no special inputs for now.
- // TODO: Return early for non-entry CCs.
+ if (!isEntryFunction()) {
+ // Non-entry functions have no special inputs for now, other registers
+ // required for scratch access.
+ ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
+ ScratchWaveOffsetReg = AMDGPU::SGPR4;
+ FrameOffsetReg = AMDGPU::SGPR5;
+ return;
+ }
CallingConv::ID CC = F->getCallingConv();
- if (CC == CallingConv::AMDGPU_PS)
- PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
-
- if (AMDGPU::isKernel(CC)) {
+ if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
+ } else if (CC == CallingConv::AMDGPU_PS) {
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
}
if (ST.debuggerEmitPrologue()) {
@@ -120,7 +125,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
- bool HasStackObjects = FrameInfo.hasStackObjects();
+ bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls();
if (HasStackObjects || MaySpill) {
PrivateSegmentWaveByteOffset = true;
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index dc9f509e60ae..348bb4fa0260 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -388,9 +388,8 @@ public:
void setScratchWaveOffsetReg(unsigned Reg) {
assert(Reg != AMDGPU::NoRegister && "Should never be unset");
ScratchWaveOffsetReg = Reg;
-
- // FIXME: Only for entry functions.
- FrameOffsetReg = ScratchWaveOffsetReg;
+ if (isEntryFunction())
+ FrameOffsetReg = ScratchWaveOffsetReg;
}
unsigned getQueuePtrUserSGPR() const {
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index e02c2e3240e8..4dc090d9b7ed 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include <unordered_map>
+#include <unordered_set>
using namespace llvm;
@@ -44,26 +45,29 @@ namespace {
class SDWAOperand;
class SIPeepholeSDWA : public MachineFunctionPass {
+public:
+ typedef SmallVector<SDWAOperand *, 4> SDWAOperandsVector;
+
private:
MachineRegisterInfo *MRI;
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
+ std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
Optional<int64_t> foldToImm(const MachineOperand &Op) const;
public:
static char ID;
- typedef SmallVector<std::unique_ptr<SDWAOperand>, 4> SDWAOperandsVector;
-
SIPeepholeSDWA() : MachineFunctionPass(ID) {
initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
void matchSDWAOperands(MachineFunction &MF);
+ bool isConvertibleToSDWA(const MachineInstr &MI) const;
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
StringRef getPassName() const override { return "SI Peephole SDWA"; }
@@ -468,7 +472,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
auto SDWADst =
- make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
+ make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
SDWAOperands[&MI] = std::move(SDWADst);
++NumSDWAPatternsFound;
@@ -575,8 +579,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
}
}
-bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
- const SDWAOperandsVector &SDWAOperands) {
+bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
// Check if this instruction can be converted to SDWA:
// 1. Does this opcode support SDWA
if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1)
@@ -588,6 +591,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
return false;
}
+ return true;
+}
+
+bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
+ const SDWAOperandsVector &SDWAOperands) {
// Convert to sdwa
int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
assert(SDWAOpcode != -1);
@@ -664,7 +672,18 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
// Apply all sdwa operand pattenrs
bool Converted = false;
for (auto &Operand : SDWAOperands) {
- Converted |= Operand->convertToSDWA(*SDWAInst, TII);
+ // There should be no intesection between SDWA operands and potential MIs
+ // e.g.:
+ // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
+ // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
+ // v_add_u32 v3, v4, v2
+ //
+ // In that example it is possible that we would fold 2nd instruction into 3rd
+ // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was
+ // already destroyed). So if SDWAOperand is also a potential MI then do not
+ // apply it.
+ if (PotentialMatches.count(Operand->getParentInst()) == 0)
+ Converted |= Operand->convertToSDWA(*SDWAInst, TII);
}
if (!Converted) {
SDWAInst->eraseFromParent();
@@ -690,16 +709,15 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
-
- std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
-
+
+ // Find all SDWA operands in MF.
matchSDWAOperands(MF);
- for (auto &OperandPair : SDWAOperands) {
- auto &Operand = OperandPair.second;
+ for (const auto &OperandPair : SDWAOperands) {
+ const auto &Operand = OperandPair.second;
MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
- if (PotentialMI) {
- PotentialMatches[PotentialMI].push_back(std::move(Operand));
+ if (PotentialMI && isConvertibleToSDWA(*PotentialMI)) {
+ PotentialMatches[PotentialMI].push_back(Operand.get());
}
}
@@ -708,6 +726,7 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
convertToSDWA(PotentialMI, PotentialPair.second);
}
+ PotentialMatches.clear();
SDWAOperands.clear();
return false;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 06cfc95be96a..6fb01a09fe13 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -117,11 +117,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
-unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
- const MachineFunction &MF) const {
-
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- unsigned RegCount = ST.getMaxNumSGPRs(MF);
+static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
unsigned Reg;
// Try to place it in a hole after PrivateSegmentBufferReg.
@@ -134,9 +130,22 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
// wave offset before it.
Reg = RegCount - 5;
}
+
+ return Reg;
+}
+
+unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
+ const MachineFunction &MF) const {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
return AMDGPU::SGPR_32RegClass.getRegister(Reg);
}
+unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
+ const MachineFunction &MF) const {
+ return AMDGPU::SGPR32;
+}
+
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
@@ -198,15 +207,33 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
}
+ unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
+ if (StackPtrReg != AMDGPU::NoRegister) {
+ reserveRegisterTuples(Reserved, StackPtrReg);
+ assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
+ }
+
+ unsigned FrameReg = MFI->getFrameOffsetReg();
+ if (FrameReg != AMDGPU::NoRegister) {
+ reserveRegisterTuples(Reserved, FrameReg);
+ assert(!isSubRegister(ScratchRSrcReg, FrameReg));
+ }
+
return Reserved;
}
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
- return Fn.getFrameInfo().hasStackObjects();
+ const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
+ if (Info->isEntryFunction()) {
+ const MachineFrameInfo &MFI = Fn.getFrameInfo();
+ return MFI.hasStackObjects() || MFI.hasCalls();
+ }
+
+ // May need scavenger for dealing with callee saved registers.
+ return true;
}
-bool
-SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
+bool SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
return MF.getFrameInfo().hasStackObjects();
}
@@ -318,8 +345,11 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
assert(FIOp && FIOp->isFI() && "frame index must be address operand");
-
assert(TII->isMUBUF(MI));
+ assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
+ MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
+ "should only be seeing frame offset relative FrameIndex");
+
MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
int64_t NewOffset = OffsetOp->getImm() + Offset;
@@ -981,12 +1011,72 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
default: {
- if (TII->isMUBUF(*MI)) {
+ const DebugLoc &DL = MI->getDebugLoc();
+ bool IsMUBUF = TII->isMUBUF(*MI);
+
+ if (!IsMUBUF &&
+ MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
+ // Convert to an absolute stack address by finding the offset from the
+ // scratch wave base and scaling by the wave size.
+ //
+ // In an entry function/kernel the stack address is already the absolute
+ // address relative to the the scratch wave offset.
+
+ unsigned DiffReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
+ unsigned ResultReg = IsCopy ?
+ MI->getOperand(0).getReg() :
+ MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
+ .addReg(MFI->getFrameOffsetReg())
+ .addReg(MFI->getScratchWaveOffsetReg());
+
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (Offset == 0) {
+ // XXX - This never happens because of emergency scavenging slot at 0?
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
+ .addImm(Log2_32(ST.getWavefrontSize()))
+ .addReg(DiffReg);
+ } else {
+ unsigned CarryOut
+ = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned ScaledReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ // XXX - Should this use a vector shift?
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
+ .addReg(DiffReg, RegState::Kill)
+ .addImm(Log2_32(ST.getWavefrontSize()));
+
+ // TODO: Fold if use instruction is another add of a constant.
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
+ .addReg(CarryOut, RegState::Define | RegState::Dead)
+ .addImm(Offset)
+ .addReg(ScaledReg, RegState::Kill);
+
+ MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC);
+ }
+
+ // Don't introduce an extra copy if we're just materializing in a mov.
+ if (IsCopy)
+ MI->eraseFromParent();
+ else
+ FIOp.ChangeToRegister(ResultReg, false, false, true);
+ return;
+ }
+
+ if (IsMUBUF) {
// Disable offen so we don't need a 0 vgpr base.
assert(static_cast<int>(FIOperandNum) ==
AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::vaddr));
+ assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
+ == MFI->getFrameOffsetReg());
+
int64_t Offset = FrameInfo.getObjectOffset(Index);
int64_t OldImm
= TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
@@ -995,17 +1085,19 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (isUInt<12>(NewOffset) &&
buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
MI->eraseFromParent();
- break;
+ return;
}
}
+ // If the offset is simply too big, don't convert to a scratch wave offset
+ // relative index.
+
int64_t Offset = FrameInfo.getObjectOffset(Index);
FIOp.ChangeToImmediate(Offset);
if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
- .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addImm(Offset);
FIOp.ChangeToRegister(TmpReg, false, false, true);
}
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index 679ed229758a..b91cdddc5520 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -17,6 +17,7 @@
#include "AMDGPURegisterInfo.h"
#include "SIDefines.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
@@ -57,8 +58,16 @@ public:
unsigned reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const;
+ unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const;
+
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
+
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
+
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
@@ -228,6 +237,11 @@ public:
const int *getRegUnitPressureSets(unsigned RegUnit) const override;
+ unsigned getReturnAddressReg(const MachineFunction &MF) const {
+ // Not a callee saved register.
+ return AMDGPU::SGPR30_SGPR31;
+ }
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index 593439c2a3cd..f2d8b6f7b7a4 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -186,11 +186,23 @@ def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">;
def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">;
def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64">;
-let isTerminator = 1, isBarrier = 1,
- isBranch = 1, isIndirectBranch = 1 in {
+let isTerminator = 1, isBarrier = 1, SchedRW = [WriteBranch] in {
+
+let isBranch = 1, isIndirectBranch = 1 in {
def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;
+} // End isBranch = 1, isIndirectBranch = 1
+
+let isReturn = 1 in {
+// Define variant marked as return rather than branch.
+def S_SETPC_B64_return : SOP1_1<"", [(AMDGPUret_flag i64:$src0)]>;
+}
+} // End isTerminator = 1, isBarrier = 1
+
+let isCall = 1 in {
+def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64"
+>;
}
-def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64">;
+
def S_RFE_B64 : SOP1_1 <"s_rfe_b64">;
let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d565c84bfeda..2abd4afad3b6 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -518,7 +518,18 @@ bool isCompute(CallingConv::ID cc) {
}
bool isEntryFunctionCC(CallingConv::ID CC) {
- return true;
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ return true;
+ default:
+ return false;
+ }
}
bool isSI(const MCSubtargetInfo &STI) {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index d6c836eb748b..8e74aa2cc9a8 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -262,7 +262,6 @@ bool isEntryFunctionCC(CallingConv::ID CC);
LLVM_READNONE
inline bool isKernel(CallingConv::ID CC) {
switch (CC) {
- case CallingConv::C:
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
return true;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 5583d6148b08..1979cbf50125 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -471,7 +471,7 @@ void ARMPassConfig::addIRPasses() {
if (TM->Options.ThreadModel == ThreadModel::Single)
addPass(createLowerAtomicPass());
else
- addPass(createAtomicExpandPass(TM));
+ addPass(createAtomicExpandPass());
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
@@ -486,7 +486,7 @@ void ARMPassConfig::addIRPasses() {
// Match interleaved memory accesses to ldN/stN intrinsics.
if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createInterleavedAccessPass(TM));
+ addPass(createInterleavedAccessPass());
}
bool ARMPassConfig::addPreISel() {
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index e4df7ff5c200..e6ea67d55b43 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -399,7 +399,7 @@ void Simplifier::Context::cleanup() {
for (Value *V : Clones) {
Instruction *U = cast<Instruction>(V);
if (!U->getParent())
- delete U;
+ U->deleteValue();
}
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 6913d50bbcaa..8e93df6201ae 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -252,7 +252,7 @@ void HexagonPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
- addPass(createAtomicExpandPass(TM));
+ addPass(createAtomicExpandPass());
if (!NoOpt) {
if (EnableLoopPrefetch)
addPass(createLoopDataPrefetchPass());
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 7553f3972f5d..008b9505ee26 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -23,14 +23,14 @@ namespace llvm {
class ModulePass;
class FunctionPass;
- ModulePass *createMipsOs16Pass(MipsTargetMachine &TM);
- ModulePass *createMips16HardFloatPass(MipsTargetMachine &TM);
+ ModulePass *createMipsOs16Pass();
+ ModulePass *createMips16HardFloatPass();
- FunctionPass *createMipsModuleISelDagPass(MipsTargetMachine &TM);
- FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM);
- FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsModuleISelDagPass();
+ FunctionPass *createMipsOptimizePICCallPass();
+ FunctionPass *createMipsDelaySlotFillerPass();
FunctionPass *createMipsHazardSchedule();
- FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsLongBranchPass();
FunctionPass *createMipsConstantIslandPass();
FunctionPass *createMicroMipsSizeReductionPass();
} // end namespace llvm;
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 5a394fe02f16..3c2426129e49 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
@@ -28,14 +29,16 @@ namespace {
public:
static char ID;
- Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
+ Mips16HardFloat() : ModulePass(ID) {}
StringRef getPassName() const override { return "MIPS16 Hard Float Pass"; }
- bool runOnModule(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ ModulePass::getAnalysisUsage(AU);
+ }
- protected:
- const MipsTargetMachine &TM;
+ bool runOnModule(Module &M) override;
};
static void EmitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) {
@@ -520,6 +523,8 @@ static void removeUseSoftFloat(Function &F) {
// during call lowering but it should be moved here in the future.
//
bool Mips16HardFloat::runOnModule(Module &M) {
+ auto &TM = static_cast<const MipsTargetMachine &>(
+ getAnalysis<TargetPassConfig>().getTM<TargetMachine>());
DEBUG(errs() << "Run on Module Mips16HardFloat\n");
bool Modified = false;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
@@ -541,6 +546,6 @@ bool Mips16HardFloat::runOnModule(Module &M) {
}
-ModulePass *llvm::createMips16HardFloatPass(MipsTargetMachine &TM) {
- return new Mips16HardFloat(TM);
+ModulePass *llvm::createMips16HardFloatPass() {
+ return new Mips16HardFloat();
}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 1597057ad63f..5d82571ff94f 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -211,12 +211,12 @@ namespace {
class Filler : public MachineFunctionPass {
public:
- Filler(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm) { }
+ Filler() : MachineFunctionPass(ID), TM(nullptr) {}
StringRef getPassName() const override { return "Mips Delay Slot Filler"; }
bool runOnMachineFunction(MachineFunction &F) override {
+ TM = &F.getTarget();
bool Changed = false;
for (MachineFunction::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI)
@@ -290,7 +290,7 @@ namespace {
bool terminateSearch(const MachineInstr &Candidate) const;
- TargetMachine &TM;
+ const TargetMachine *TM;
static char ID;
};
@@ -610,7 +610,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
Changed = true;
// Delay slot filling is disabled at -O0.
- if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
+ if (!DisableDelaySlotFiller && (TM->getOptLevel() != CodeGenOpt::None)) {
bool Filled = false;
if (MipsCompactBranchPolicy.getValue() != CB_Always ||
@@ -910,6 +910,4 @@ bool Filler::terminateSearch(const MachineInstr &Candidate) const {
/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
/// slots in Mips MachineFunctions
-FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
- return new Filler(tm);
-}
+FunctionPass *llvm::createMipsDelaySlotFillerPass() { return new Filler(); }
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 78bae6954c3c..3641a70d61b5 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -795,7 +795,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
uint64_t SMPos0, SMSize0, SMPos1, SMSize1;
- ConstantSDNode *CN;
+ ConstantSDNode *CN, *CN1;
// See if Op's first operand matches (and $src1 , mask0).
if (And0.getOpcode() != ISD::AND)
@@ -806,37 +806,74 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// See if Op's second operand matches (and (shl $src, pos), mask1).
- if (And1.getOpcode() != ISD::AND)
- return SDValue();
+ if (And1.getOpcode() == ISD::AND &&
+ And1.getOperand(0).getOpcode() == ISD::SHL) {
- if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
- !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
- return SDValue();
+ if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
+ !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+ return SDValue();
- // The shift masks must have the same position and size.
- if (SMPos0 != SMPos1 || SMSize0 != SMSize1)
- return SDValue();
+ // The shift masks must have the same position and size.
+ if (SMPos0 != SMPos1 || SMSize0 != SMSize1)
+ return SDValue();
- SDValue Shl = And1.getOperand(0);
- if (Shl.getOpcode() != ISD::SHL)
- return SDValue();
+ SDValue Shl = And1.getOperand(0);
- if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
- return SDValue();
+ if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1))))
+ return SDValue();
- unsigned Shamt = CN->getZExtValue();
+ unsigned Shamt = CN->getZExtValue();
- // Return if the shift amount and the first bit position of mask are not the
- // same.
- EVT ValTy = N->getValueType(0);
- if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
- return SDValue();
+ // Return if the shift amount and the first bit position of mask are not the
+ // same.
+ EVT ValTy = N->getValueType(0);
+ if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
+ return SDValue();
- SDLoc DL(N);
- return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0),
- DAG.getConstant(SMPos0, DL, MVT::i32),
- DAG.getConstant(SMSize0, DL, MVT::i32),
- And0.getOperand(0));
+ SDLoc DL(N);
+ return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0),
+ DAG.getConstant(SMPos0, DL, MVT::i32),
+ DAG.getConstant(SMSize0, DL, MVT::i32),
+ And0.getOperand(0));
+ } else {
+ // Pattern match DINS.
+ // $dst = or (and $src, mask0), mask1
+ // where mask0 = ((1 << SMSize0) -1) << SMPos0
+ // => dins $dst, $src, pos, size
+ if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMPos0) &&
+ ((SMSize0 + SMPos0 <= 64 && Subtarget.hasMips64r2()) ||
+ (SMSize0 + SMPos0 <= 32))) {
+ // Check if AND instruction has constant as argument
+ bool isConstCase = And1.getOpcode() != ISD::AND;
+ if (And1.getOpcode() == ISD::AND) {
+ if (!(CN1 = dyn_cast<ConstantSDNode>(And1->getOperand(1))))
+ return SDValue();
+ } else {
+ if (!(CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1))))
+ return SDValue();
+ }
+ SDLoc DL(N);
+ EVT ValTy = N->getOperand(0)->getValueType(0);
+ SDValue Const1;
+ SDValue SrlX;
+ if (!isConstCase) {
+ Const1 = DAG.getConstant(SMPos0, DL, MVT::i32);
+ SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1);
+ }
+ return DAG.getNode(
+ MipsISD::Ins, DL, N->getValueType(0),
+ isConstCase
+ ? DAG.getConstant(CN1->getSExtValue() >> SMPos0, DL, ValTy)
+ : SrlX,
+ DAG.getConstant(SMPos0, DL, MVT::i32),
+ DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? SMSize0 & 31
+ : SMSize0,
+ DL, MVT::i32),
+ And0->getOperand(0));
+
+ }
+ return SDValue();
+ }
}
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 100503700a72..b95f1158fa56 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -75,9 +75,8 @@ namespace {
public:
static char ID;
- MipsLongBranch(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm), IsPIC(TM.isPositionIndependent()),
- ABI(static_cast<const MipsTargetMachine &>(TM).getABI()) {}
+ MipsLongBranch()
+ : MachineFunctionPass(ID), ABI(MipsABIInfo::Unknown()) {}
StringRef getPassName() const override { return "Mips Long Branch"; }
@@ -96,7 +95,6 @@ namespace {
MachineBasicBlock *MBBOpnd);
void expandToLongBranch(MBBInfo &Info);
- const TargetMachine &TM;
MachineFunction *MF;
SmallVector<MBBInfo, 16> MBBInfos;
bool IsPIC;
@@ -469,6 +467,12 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
static_cast<const MipsSubtarget &>(F.getSubtarget());
const MipsInstrInfo *TII =
static_cast<const MipsInstrInfo *>(STI.getInstrInfo());
+
+
+ const TargetMachine& TM = F.getTarget();
+ IsPIC = TM.isPositionIndependent();
+ ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
+
LongBranchSeqSize =
!IsPIC ? 2 : (ABI.IsN64() ? 10 : (!STI.isTargetNaCl() ? 9 : 10));
@@ -541,6 +545,4 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
/// createMipsLongBranchPass - Returns a pass that converts branches to long
/// branches.
-FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) {
- return new MipsLongBranch(tm);
-}
+FunctionPass *llvm::createMipsLongBranchPass() { return new MipsLongBranch(); }
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index cf85eb3f2416..ceacaa498389 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -10,6 +10,7 @@
#include "Mips.h"
#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -22,18 +23,19 @@ namespace {
public:
static char ID;
- explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
- : MachineFunctionPass(ID), TM(TM_) {}
+ MipsModuleDAGToDAGISel() : MachineFunctionPass(ID) {}
// Pass Name
StringRef getPassName() const override {
return "MIPS DAG->DAG Pattern Instruction Selection";
}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
- protected:
- MipsTargetMachine &TM;
+ bool runOnMachineFunction(MachineFunction &MF) override;
};
char MipsModuleDAGToDAGISel::ID = 0;
@@ -41,10 +43,12 @@ namespace {
bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<MipsTargetMachine>();
TM.resetSubtarget(&MF);
return false;
}
-llvm::FunctionPass *llvm::createMipsModuleISelDagPass(MipsTargetMachine &TM) {
- return new MipsModuleDAGToDAGISel(TM);
+llvm::FunctionPass *llvm::createMipsModuleISelDagPass() {
+ return new MipsModuleDAGToDAGISel();
}
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index f8d9c34556bc..94a1965f9ffb 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -59,7 +59,7 @@ private:
class OptimizePICCall : public MachineFunctionPass {
public:
- OptimizePICCall(TargetMachine &tm) : MachineFunctionPass(ID) {}
+ OptimizePICCall() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Mips OptimizePICCall"; }
@@ -297,6 +297,6 @@ void OptimizePICCall::incCntAndSetReg(ValueType Entry, unsigned Reg) {
}
/// Return an OptimizeCall object.
-FunctionPass *llvm::createMipsOptimizePICCallPass(MipsTargetMachine &TM) {
- return new OptimizePICCall(TM);
+FunctionPass *llvm::createMipsOptimizePICCallPass() {
+ return new OptimizePICCall();
}
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 670b6c96e78e..70ead5cde6fa 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -155,6 +155,4 @@ bool MipsOs16::runOnModule(Module &M) {
return modified;
}
-ModulePass *llvm::createMipsOs16Pass(MipsTargetMachine &TM) {
- return new MipsOs16;
-}
+ModulePass *llvm::createMipsOs16Pass() { return new MipsOs16(); }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 29a38fd35c1f..092de216e9b8 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -154,6 +154,11 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
bool hasNoMips16Attr =
!F.getFnAttribute("nomips16").hasAttribute(Attribute::None);
+ bool HasMicroMipsAttr =
+ !F.getFnAttribute("micromips").hasAttribute(Attribute::None);
+ bool HasNoMicroMipsAttr =
+ !F.getFnAttribute("nomicromips").hasAttribute(Attribute::None);
+
// FIXME: This is related to the code below to reset the target options,
// we need to know whether or not the soft float flag is set on the
// function, so we can enable it as a subtarget feature.
@@ -165,6 +170,10 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
FS += FS.empty() ? "+mips16" : ",+mips16";
else if (hasNoMips16Attr)
FS += FS.empty() ? "-mips16" : ",-mips16";
+ if (HasMicroMipsAttr)
+ FS += FS.empty() ? "+micromips" : ",+micromips";
+ else if (HasNoMicroMipsAttr)
+ FS += FS.empty() ? "-micromips" : ",-micromips";
if (softFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";
@@ -223,23 +232,23 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
void MipsPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
- addPass(createAtomicExpandPass(&getMipsTargetMachine()));
+ addPass(createAtomicExpandPass());
if (getMipsSubtarget().os16())
- addPass(createMipsOs16Pass(getMipsTargetMachine()));
+ addPass(createMipsOs16Pass());
if (getMipsSubtarget().inMips16HardFloat())
- addPass(createMips16HardFloatPass(getMipsTargetMachine()));
+ addPass(createMips16HardFloatPass());
}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsPassConfig::addInstSelector() {
- addPass(createMipsModuleISelDagPass(getMipsTargetMachine()));
+ addPass(createMipsModuleISelDagPass());
addPass(createMips16ISelDag(getMipsTargetMachine(), getOptLevel()));
addPass(createMipsSEISelDag(getMipsTargetMachine(), getOptLevel()));
return false;
}
void MipsPassConfig::addPreRegAlloc() {
- addPass(createMipsOptimizePICCallPass(getMipsTargetMachine()));
+ addPass(createMipsOptimizePICCallPass());
}
TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
@@ -259,15 +268,14 @@ TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
void MipsPassConfig::addPreEmitPass() {
- MipsTargetMachine &TM = getMipsTargetMachine();
addPass(createMicroMipsSizeReductionPass());
// The delay slot filler pass can potientially create forbidden slot (FS)
// hazards for MIPSR6 which the hazard schedule pass (HSP) will fix. Any
// (new) pass that creates compact branches after the HSP must handle FS
// hazards itself or be pipelined before the HSP.
- addPass(createMipsDelaySlotFillerPass(TM));
+ addPass(createMipsDelaySlotFillerPass());
addPass(createMipsHazardSchedule());
- addPass(createMipsLongBranchPass(TM));
+ addPass(createMipsLongBranchPass());
addPass(createMipsConstantIslandPass());
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 144aea850833..e65b1f1aa0a5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -689,6 +689,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRA, MVT::v2i64, Legal);
setOperationAction(ISD::SRL, MVT::v2i64, Legal);
+ // 128 bit shifts can be accomplished via 3 instructions for SHL and
+ // SRL, but not for SRA because of the instructions available:
+ // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
+ // doing
+ setOperationAction(ISD::SHL, MVT::v1i128, Expand);
+ setOperationAction(ISD::SRL, MVT::v1i128, Expand);
+ setOperationAction(ISD::SRA, MVT::v1i128, Expand);
+
setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
}
else {
@@ -742,6 +750,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasP9Vector()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ // 128 bit shifts can be accomplished via 3 instructions for SHL and
+ // SRL, but not for SRA because of the instructions available:
+ // VS{RL} and VS{RL}O.
+ setOperationAction(ISD::SHL, MVT::v1i128, Legal);
+ setOperationAction(ISD::SRL, MVT::v1i128, Legal);
+ setOperationAction(ISD::SRA, MVT::v1i128, Expand);
}
}
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index e14d18fd5433..5465b5f2d66c 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -987,12 +987,16 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSLH $vA, $vB))>;
def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSLB $vA, $vB))>;
def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSLH $vA, $vB))>;
def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRB $vA, $vB))>;
@@ -1000,12 +1004,16 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSRH $vA, $vB))>;
def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRB $vA, $vB))>;
def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSRH $vA, $vB))>;
def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)),
+ (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRAB $vA, $vB))>;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 3afcec1248d5..46f103141bc1 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1533,6 +1533,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case PPC::FCMPUD:
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = MI.getOperand(2).getReg();
+ Value = 0;
+ Mask = 0;
return true;
}
}
@@ -1591,9 +1593,12 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// We can perform this optimization, equality only, if MI is
// zero-extending.
+ // FIXME: Other possible target instructions include ANDISo and
+ // RLWINM aliases, such as ROTRWI, EXTLWI, SLWI and SRWI.
if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
MIOpC == PPC::SLW || MIOpC == PPC::SLWo ||
MIOpC == PPC::SRW || MIOpC == PPC::SRWo ||
+ MIOpC == PPC::ANDIo ||
isZeroExtendingRotate) {
noSub = true;
equalityOnly = true;
@@ -1641,6 +1646,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
break;
}
+ SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
+ SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
+
// There are two possible candidates which can be changed to set CR[01].
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
@@ -1652,9 +1660,37 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// same BB as the comparison. This is to allow the check below to avoid calls
// (and other explicit clobbers); instead we should really check for these
// more explicitly (in at least a few predecessors).
- else if (MI->getParent() != CmpInstr.getParent() || Value != 0) {
- // PPC does not have a record-form SUBri.
+ else if (MI->getParent() != CmpInstr.getParent())
return false;
+ else if (Value != 0) {
+ // The record-form instructions set CR bit based on signed comparison against 0.
+ // We try to convert a compare against 1 or -1 into a compare against 0.
+ bool Success = false;
+ if (!equalityOnly && MRI->hasOneUse(CRReg)) {
+ MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
+ if (UseMI->getOpcode() == PPC::BCC) {
+ PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
+ int16_t Immed = (int16_t)Value;
+
+ if (Immed == -1 && Pred == PPC::PRED_GT) {
+ // We convert "greater than -1" into "greater than or equal to 0",
+ // since we are assuming signed comparison by !equalityOnly
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
+ PPC::PRED_GE));
+ Success = true;
+ }
+ else if (Immed == 1 && Pred == PPC::PRED_LT) {
+ // We convert "less than 1" into "less than or equal to 0".
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
+ PPC::PRED_LE));
+ Success = true;
+ }
+ }
+ }
+
+ // PPC does not have a record-form SUBri.
+ if (!Success)
+ return false;
}
// Search for Sub.
@@ -1720,15 +1756,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (NewOpC == -1)
return false;
- SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
- SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
-
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
// needs to be updated to be based on SUB. Push the condition code
// operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
// condition code of these operands will be modified.
+ // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
+ // comparison against 0, which may modify predicate.
bool ShouldSwap = false;
- if (Sub) {
+ if (Sub && Value == 0) {
ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
Sub->getOperand(2).getReg() == SrcReg;
@@ -1765,6 +1800,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
} else // We need to abort on a user we don't understand.
return false;
}
+ assert(!(Value != 0 && ShouldSwap) &&
+ "Non-zero immediate support and ShouldSwap"
+ "may conflict in updating predicate");
// Create a new virtual register to hold the value of the CR set by the
// record-form instruction. If the instruction was not previously in
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 7806d45b5457..ddae5befee3e 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -322,7 +322,7 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
void PPCPassConfig::addIRPasses() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createPPCBoolRetToIntPass());
- addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+ addPass(createAtomicExpandPass());
// For the BG/Q (or if explicitly requested), add explicit data prefetch
// intrinsics.
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 6f9cc314e376..df819ccd15db 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -96,7 +96,7 @@ namespace {
/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
/// slots in Sparc MachineFunctions
///
-FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+FunctionPass *llvm::createSparcDelaySlotFillerPass() {
return new Filler;
}
diff --git a/lib/Target/Sparc/LeonPasses.cpp b/lib/Target/Sparc/LeonPasses.cpp
index 0acc2875daa8..ca6a0dc3c2a3 100755
--- a/lib/Target/Sparc/LeonPasses.cpp
+++ b/lib/Target/Sparc/LeonPasses.cpp
@@ -21,9 +21,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-LEONMachineFunctionPass::LEONMachineFunctionPass(TargetMachine &tm, char &ID)
- : MachineFunctionPass(ID) {}
-
LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID)
: MachineFunctionPass(ID) {}
@@ -72,8 +69,7 @@ int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo &MRI) {
//
char InsertNOPLoad::ID = 0;
-InsertNOPLoad::InsertNOPLoad(TargetMachine &tm)
- : LEONMachineFunctionPass(tm, ID) {}
+InsertNOPLoad::InsertNOPLoad() : LEONMachineFunctionPass(ID) {}
bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
@@ -114,7 +110,7 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
//
char FixFSMULD::ID = 0;
-FixFSMULD::FixFSMULD(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {}
+FixFSMULD::FixFSMULD() : LEONMachineFunctionPass(ID) {}
bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
@@ -203,8 +199,7 @@ bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) {
//
char ReplaceFMULS::ID = 0;
-ReplaceFMULS::ReplaceFMULS(TargetMachine &tm)
- : LEONMachineFunctionPass(tm, ID) {}
+ReplaceFMULS::ReplaceFMULS() : LEONMachineFunctionPass(ID) {}
bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
@@ -287,8 +282,7 @@ bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) {
char DetectRoundChange::ID = 0;
-DetectRoundChange::DetectRoundChange(TargetMachine &tm)
- : LEONMachineFunctionPass(tm, ID) {}
+DetectRoundChange::DetectRoundChange() : LEONMachineFunctionPass(ID) {}
bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
@@ -338,8 +332,7 @@ bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) {
//
char FixAllFDIVSQRT::ID = 0;
-FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm)
- : LEONMachineFunctionPass(tm, ID) {}
+FixAllFDIVSQRT::FixAllFDIVSQRT() : LEONMachineFunctionPass(ID) {}
bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
diff --git a/lib/Target/Sparc/LeonPasses.h b/lib/Target/Sparc/LeonPasses.h
index 2158cb636bfc..99cdfc4589ef 100755
--- a/lib/Target/Sparc/LeonPasses.h
+++ b/lib/Target/Sparc/LeonPasses.h
@@ -32,7 +32,6 @@ protected:
std::vector<int> UsedRegisters;
protected:
- LEONMachineFunctionPass(TargetMachine &tm, char &ID);
LEONMachineFunctionPass(char &ID);
int GetRegIndexForOperand(MachineInstr &MI, int OperandIndex);
@@ -48,7 +47,7 @@ class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass {
public:
static char ID;
- InsertNOPLoad(TargetMachine &tm);
+ InsertNOPLoad();
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -62,7 +61,7 @@ class LLVM_LIBRARY_VISIBILITY FixFSMULD : public LEONMachineFunctionPass {
public:
static char ID;
- FixFSMULD(TargetMachine &tm);
+ FixFSMULD();
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -74,7 +73,7 @@ class LLVM_LIBRARY_VISIBILITY ReplaceFMULS : public LEONMachineFunctionPass {
public:
static char ID;
- ReplaceFMULS(TargetMachine &tm);
+ ReplaceFMULS();
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -89,7 +88,7 @@ class LLVM_LIBRARY_VISIBILITY DetectRoundChange
public:
static char ID;
- DetectRoundChange(TargetMachine &tm);
+ DetectRoundChange();
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -102,7 +101,7 @@ class LLVM_LIBRARY_VISIBILITY FixAllFDIVSQRT : public LEONMachineFunctionPass {
public:
static char ID;
- FixAllFDIVSQRT(TargetMachine &tm);
+ FixAllFDIVSQRT();
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 0a8272d89297..4135e4e1b61d 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -28,7 +28,7 @@ namespace llvm {
class MachineInstr;
FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
- FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcDelaySlotFillerPass();
void LowerSparcMachineInstrToMCInst(const MachineInstr *MI,
MCInst &OutMI,
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 4ae64062d9e2..1da4d3604304 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -132,7 +132,7 @@ TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void SparcPassConfig::addIRPasses() {
- addPass(createAtomicExpandPass(&getSparcTargetMachine()));
+ addPass(createAtomicExpandPass());
TargetPassConfig::addIRPasses();
}
@@ -143,26 +143,26 @@ bool SparcPassConfig::addInstSelector() {
}
void SparcPassConfig::addPreEmitPass(){
- addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
+ addPass(createSparcDelaySlotFillerPass());
if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad())
{
- addPass(new InsertNOPLoad(getSparcTargetMachine()));
+ addPass(new InsertNOPLoad());
}
if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD())
{
- addPass(new FixFSMULD(getSparcTargetMachine()));
+ addPass(new FixFSMULD());
}
if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS())
{
- addPass(new ReplaceFMULS(getSparcTargetMachine()));
+ addPass(new ReplaceFMULS());
}
if (this->getSparcTargetMachine().getSubtargetImpl()->detectRoundChange()) {
- addPass(new DetectRoundChange(getSparcTargetMachine()));
+ addPass(new DetectRoundChange());
}
if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT())
{
- addPass(new FixAllFDIVSQRT(getSparcTargetMachine()));
+ addPass(new FixAllFDIVSQRT());
}
}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 44c794ef5da1..b974681fb6af 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -173,7 +173,7 @@ void WebAssemblyPassConfig::addIRPasses() {
else
// Expand some atomic operations. WebAssemblyTargetLowering has hooks which
// control specifically what gets lowered.
- addPass(createAtomicExpandPass(TM));
+ addPass(createAtomicExpandPass());
// Fix function bitcasts, as WebAssembly requires caller and callee signatures
// to match.
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 784c3a6557ff..3a421fe77392 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -235,6 +235,8 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
+def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
+ "LEA instruction with 3 ops or certain registers is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
def FeatureSoftFloat
@@ -480,6 +482,7 @@ def SNBFeatures : ProcessorFeatures<[], [
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureLAHFSAHF,
+ FeatureSlow3OpsLEA,
FeatureFastScalarFSQRT,
FeatureFastSHLDRotate
]>;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 6781d761a1c4..7d146d050a5c 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -73,8 +73,8 @@ def CC_#NAME : CallingConv<[
CCIfSubtarget<"is64Bit()", CCIfByVal<CCPassByVal<8, 8>>>,
CCIfByVal<CCPassByVal<4, 4>>,
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
// Promote v8i1/v16i1/v32i1 arguments to i32.
CCIfType<[v8i1, v16i1, v32i1], CCPromoteToType<i32>>,
@@ -146,8 +146,8 @@ def CC_#NAME : CallingConv<[
]>;
def RetCC_#NAME : CallingConv<[
- // Promote i1, v8i1 arguments to i8.
- CCIfType<[i1, v8i1], CCPromoteToType<i8>>,
+ // Promote i1, v1i1, v8i1 arguments to i8.
+ CCIfType<[i1, v1i1, v8i1], CCPromoteToType<i8>>,
// Promote v16i1 arguments to i16.
CCIfType<[v16i1], CCPromoteToType<i16>>,
@@ -207,6 +207,7 @@ def RetCC_X86Common : CallingConv<[
//
// For code that doesn't care about the ABI, we allow returning more than two
// integer values in registers.
+ CCIfType<[v1i1], CCPromoteToType<i8>>,
CCIfType<[i1], CCPromoteToType<i8>>,
CCIfType<[i8] , CCAssignToReg<[AL, DL, CL]>>,
CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
@@ -375,6 +376,7 @@ def RetCC_X86_64_Swift : CallingConv<[
CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R12]>>>,
// For integers, ECX, R8D can be used as extra return registers.
+ CCIfType<[v1i1], CCPromoteToType<i8>>,
CCIfType<[i1], CCPromoteToType<i8>>,
CCIfType<[i8] , CCAssignToReg<[AL, DL, CL, R8B]>>,
CCIfType<[i16], CCAssignToReg<[AX, DX, CX, R8W]>>,
@@ -485,8 +487,8 @@ def CC_X86_64_C : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<8, 8>>,
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
// The 'nest' parameter, if any, is passed in R10.
CCIfNest<CCIfSubtarget<"isTarget64BitILP32()", CCAssignToReg<[R10D]>>>,
@@ -584,8 +586,8 @@ def CC_X86_Win64_C : CallingConv<[
// FIXME: Handle byval stuff.
// FIXME: Handle varargs.
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
// The 'nest' parameter, if any, is passed in R10.
CCIfNest<CCAssignToReg<[R10]>>,
@@ -796,8 +798,8 @@ def CC_X86_32_Common : CallingConv<[
]>;
def CC_X86_32_C : CallingConv<[
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
// The 'nest' parameter, if any, is passed in ECX.
CCIfNest<CCAssignToReg<[ECX]>>,
@@ -816,8 +818,8 @@ def CC_X86_32_MCU : CallingConv<[
// puts arguments in registers.
CCIfByVal<CCPassByVal<4, 4>>,
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
// If the call is not a vararg call, some arguments may be passed
// in integer registers.
@@ -828,8 +830,8 @@ def CC_X86_32_MCU : CallingConv<[
]>;
def CC_X86_32_FastCall : CallingConv<[
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
// The 'nest' parameter, if any, is passed in EAX.
CCIfNest<CCAssignToReg<[EAX]>>,
@@ -858,15 +860,15 @@ def CC_X86_32_ThisCall_Common : CallingConv<[
]>;
def CC_X86_32_ThisCall_Mingw : CallingConv<[
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
CCDelegateTo<CC_X86_32_ThisCall_Common>
]>;
def CC_X86_32_ThisCall_Win : CallingConv<[
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
// Pass sret arguments indirectly through stack.
CCIfSRet<CCAssignToStack<4, 4>>,
@@ -885,8 +887,8 @@ def CC_X86_32_FastCC : CallingConv<[
// puts arguments in registers.
CCIfByVal<CCPassByVal<4, 4>>,
- // Promote i1/i8/i16 arguments to i32.
- CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ // Promote i1/i8/i16/v1i1 arguments to i32.
+ CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
// The 'nest' parameter, if any, is passed in EAX.
CCIfNest<CCAssignToReg<[EAX]>>,
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index fc3b4836c178..3cfb924abd01 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -3647,13 +3647,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
- if (Subtarget->hasAVX512()) {
- // Need to copy to a VK1 register.
- unsigned ResultReg = createResultReg(&X86::VK1RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg);
- return ResultReg;
- }
case MVT::i8:
return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
X86::sub_8bit);
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 2cd4c1a3e7b3..9f649dad8bc0 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -27,20 +27,26 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
-#define DEBUG_TYPE "x86-fixup-LEAs"
+namespace llvm {
+void initializeFixupLEAPassPass(PassRegistry &);
+}
+
+#define FIXUPLEA_DESC "X86 LEA Fixup"
+#define FIXUPLEA_NAME "x86-fixup-LEAs"
+
+#define DEBUG_TYPE FIXUPLEA_NAME
STATISTIC(NumLEAs, "Number of LEA instructions created");
namespace {
class FixupLEAPass : public MachineFunctionPass {
enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
- static char ID;
+
/// \brief Loop over all of the instructions in the basic block
/// replacing applicable instructions with LEA instructions,
/// where appropriate.
bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
- StringRef getPassName() const override { return "X86 LEA Fixup"; }
/// \brief Given a machine register, look for the instruction
/// which writes it in the current basic block. If found,
@@ -62,6 +68,22 @@ class FixupLEAPass : public MachineFunctionPass {
void processInstructionForSLM(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI);
+
+ /// \brief Given a LEA instruction which is unprofitable
+ /// on SNB+ try to replace it with other instructions.
+ /// According to Intel's Optimization Reference Manual:
+ /// " For LEA instructions with three source operands and some specific
+ /// situations, instruction latency has increased to 3 cycles, and must
+ /// dispatch via port 1:
+ /// - LEA that has all three source operands: base, index, and offset
+ /// - LEA that uses base and index registers where the base is EBP, RBP,
+ /// or R13
+ /// - LEA that uses RIP relative addressing mode
+ /// - LEA that uses 16-bit addressing mode "
+ /// This function currently handles the first 2 cases only.
+ MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
+ MachineFunction::iterator MFI);
+
/// \brief Look for LEAs that add 1 to reg or subtract 1 from reg
/// and convert them to INC or DEC respectively.
bool fixupIncDec(MachineBasicBlock::iterator &I,
@@ -85,7 +107,13 @@ class FixupLEAPass : public MachineFunctionPass {
MachineBasicBlock::iterator &MBBI) const;
public:
- FixupLEAPass() : MachineFunctionPass(ID) {}
+ static char ID;
+
+ StringRef getPassName() const override { return FIXUPLEA_DESC; }
+
+ FixupLEAPass() : MachineFunctionPass(ID) {
+ initializeFixupLEAPassPass(*PassRegistry::getPassRegistry());
+ }
/// \brief Loop over all of the basic blocks,
/// replacing instructions by equivalent LEA instructions
@@ -104,9 +132,12 @@ private:
bool OptIncDec;
bool OptLEA;
};
-char FixupLEAPass::ID = 0;
}
+char FixupLEAPass::ID = 0;
+
+INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
+
MachineInstr *
FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI) const {
@@ -168,7 +199,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize();
- OptLEA = ST.LEAusesAG() || ST.slowLEA();
+ OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA();
if (!OptLEA && !OptIncDec)
return false;
@@ -242,9 +273,64 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
return MachineBasicBlock::iterator();
}
-static inline bool isLEA(const int opcode) {
- return opcode == X86::LEA16r || opcode == X86::LEA32r ||
- opcode == X86::LEA64r || opcode == X86::LEA64_32r;
+static inline bool isLEA(const int Opcode) {
+ return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
+ Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
+}
+
+static inline bool isInefficientLEAReg(unsigned int Reg) {
+ return Reg == X86::EBP || Reg == X86::RBP || Reg == X86::R13;
+}
+
+static inline bool isRegOperand(const MachineOperand &Op) {
+ return Op.isReg() && Op.getReg() != X86::NoRegister;
+}
+/// hasIneffecientLEARegs - LEA that uses base and index registers
+/// where the base is EBP, RBP, or R13
+static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
+ const MachineOperand &Index) {
+ return Base.isReg() && isInefficientLEAReg(Base.getReg()) &&
+ isRegOperand(Index);
+}
+
+static inline bool hasLEAOffset(const MachineOperand &Offset) {
+ return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
+}
+
+// LEA instruction that has all three operands: offset, base and index
+static inline bool isThreeOperandsLEA(const MachineOperand &Base,
+ const MachineOperand &Index,
+ const MachineOperand &Offset) {
+ return isRegOperand(Base) && isRegOperand(Index) && hasLEAOffset(Offset);
+}
+
+static inline int getADDrrFromLEA(int LEAOpcode) {
+ switch (LEAOpcode) {
+ default:
+ llvm_unreachable("Unexpected LEA instruction");
+ case X86::LEA16r:
+ return X86::ADD16rr;
+ case X86::LEA32r:
+ return X86::ADD32rr;
+ case X86::LEA64_32r:
+ case X86::LEA64r:
+ return X86::ADD64rr;
+ }
+}
+
+static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
+ bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
+ switch (LEAOpcode) {
+ default:
+ llvm_unreachable("Unexpected LEA instruction");
+ case X86::LEA16r:
+ return IsInt8 ? X86::ADD16ri8 : X86::ADD16ri;
+ case X86::LEA32r:
+ case X86::LEA64_32r:
+ return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
+ case X86::LEA64r:
+ return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
+ }
}
/// isLEASimpleIncOrDec - Does this LEA have one these forms:
@@ -337,8 +423,8 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p,
void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
MachineInstr &MI = *I;
- const int opcode = MI.getOpcode();
- if (!isLEA(opcode))
+ const int Opcode = MI.getOpcode();
+ if (!isLEA(Opcode))
return;
if (MI.getOperand(5).getReg() != 0 || !MI.getOperand(4).isImm() ||
!TII->isSafeToClobberEFLAGS(*MFI, I))
@@ -350,53 +436,142 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
return;
if (MI.getOperand(2).getImm() > 1)
return;
- int addrr_opcode, addri_opcode;
- switch (opcode) {
- default:
- llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA16r:
- addrr_opcode = X86::ADD16rr;
- addri_opcode = X86::ADD16ri;
- break;
- case X86::LEA32r:
- addrr_opcode = X86::ADD32rr;
- addri_opcode = X86::ADD32ri;
- break;
- case X86::LEA64_32r:
- case X86::LEA64r:
- addrr_opcode = X86::ADD64rr;
- addri_opcode = X86::ADD64ri32;
- break;
- }
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
DEBUG(dbgs() << "FixLEA: Replaced by: ";);
MachineInstr *NewMI = nullptr;
- const MachineOperand &Dst = MI.getOperand(0);
// Make ADD instruction for two registers writing to LEA's destination
if (SrcR1 != 0 && SrcR2 != 0) {
- const MachineOperand &Src1 = MI.getOperand(SrcR1 == DstR ? 1 : 3);
- const MachineOperand &Src2 = MI.getOperand(SrcR1 == DstR ? 3 : 1);
- NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addrr_opcode))
- .add(Dst)
- .add(Src1)
- .add(Src2);
- MFI->insert(I, NewMI);
+ const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
+ const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 3 : 1);
+ NewMI =
+ BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
DEBUG(NewMI->dump(););
}
// Make ADD instruction for immediate
if (MI.getOperand(4).getImm() != 0) {
+ const MCInstrDesc &ADDri =
+ TII->get(getADDriFromLEA(Opcode, MI.getOperand(4)));
const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3);
- NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addri_opcode))
- .add(Dst)
+ NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
.add(SrcR)
.addImm(MI.getOperand(4).getImm());
- MFI->insert(I, NewMI);
DEBUG(NewMI->dump(););
}
if (NewMI) {
MFI->erase(I);
- I = static_cast<MachineBasicBlock::iterator>(NewMI);
+ I = NewMI;
+ }
+}
+
+MachineInstr *
+FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
+ MachineFunction::iterator MFI) {
+
+ const int LEAOpcode = MI.getOpcode();
+ if (!isLEA(LEAOpcode))
+ return nullptr;
+
+ const MachineOperand &Dst = MI.getOperand(0);
+ const MachineOperand &Base = MI.getOperand(1);
+ const MachineOperand &Scale = MI.getOperand(2);
+ const MachineOperand &Index = MI.getOperand(3);
+ const MachineOperand &Offset = MI.getOperand(4);
+ const MachineOperand &Segment = MI.getOperand(5);
+
+ if (!(isThreeOperandsLEA(Base, Index, Offset) ||
+ hasInefficientLEABaseReg(Base, Index)) ||
+ !TII->isSafeToClobberEFLAGS(*MFI, MI) ||
+ Segment.getReg() != X86::NoRegister)
+ return nullptr;
+
+ unsigned int DstR = Dst.getReg();
+ unsigned int BaseR = Base.getReg();
+ unsigned int IndexR = Index.getReg();
+ unsigned SSDstR =
+ (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR;
+ bool IsScale1 = Scale.getImm() == 1;
+ bool IsInefficientBase = isInefficientLEAReg(BaseR);
+ bool IsInefficientIndex = isInefficientLEAReg(IndexR);
+
+ // Skip these cases since it takes more than 2 instructions
+ // to replace the LEA instruction.
+ if (IsInefficientBase && SSDstR == BaseR && !IsScale1)
+ return nullptr;
+ if (LEAOpcode == X86::LEA64_32r && IsInefficientBase &&
+ (IsInefficientIndex || !IsScale1))
+ return nullptr;
+
+ const DebugLoc DL = MI.getDebugLoc();
+ const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode));
+ const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset));
+
+ DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
+ DEBUG(dbgs() << "FixLEA: Replaced by: ";);
+
+ // First try to replace LEA with one or two (for the 3-op LEA case)
+ // add instructions:
+ // 1.lea (%base,%index,1), %base => add %index,%base
+ // 2.lea (%base,%index,1), %index => add %base,%index
+ if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
+ const MachineOperand &Src = DstR == BaseR ? Index : Base;
+ MachineInstr *NewMI =
+ BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
+ DEBUG(NewMI->dump(););
+ // Create ADD instruction for the Offset in case of 3-Ops LEA.
+ if (hasLEAOffset(Offset)) {
+ NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
+ DEBUG(NewMI->dump(););
+ }
+ return NewMI;
+ }
+ // If the base is inefficient try switching the index and base operands,
+ // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
+ // lea offset(%base,%index,scale),%dst =>
+ // lea (%base,%index,scale); add offset,%dst
+ if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
+ MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
+ .add(Dst)
+ .add(IsInefficientBase ? Index : Base)
+ .add(Scale)
+ .add(IsInefficientBase ? Base : Index)
+ .addImm(0)
+ .add(Segment);
+ DEBUG(NewMI->dump(););
+ // Create ADD instruction for the Offset in case of 3-Ops LEA.
+ if (hasLEAOffset(Offset)) {
+ NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
+ DEBUG(NewMI->dump(););
+ }
+ return NewMI;
+ }
+ // Handle the rest of the cases with inefficient base register:
+ assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!");
+ assert(IsInefficientBase && "efficient base should be handled already!");
+
+ // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
+ if (IsScale1 && !hasLEAOffset(Offset)) {
+ TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, Base.isKill());
+ DEBUG(MI.getPrevNode()->dump(););
+
+ MachineInstr *NewMI =
+ BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
+ DEBUG(NewMI->dump(););
+ return NewMI;
}
+ // lea offset(%base,%index,scale), %dst =>
+ // lea offset( ,%index,scale), %dst; add %base,%dst
+ MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
+ .add(Dst)
+ .addReg(0)
+ .add(Scale)
+ .add(Index)
+ .add(Offset)
+ .add(Segment);
+ DEBUG(NewMI->dump(););
+
+ NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
+ DEBUG(NewMI->dump(););
+ return NewMI;
}
bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
@@ -410,8 +585,16 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
if (OptLEA) {
if (MF.getSubtarget<X86Subtarget>().isSLM())
processInstructionForSLM(I, MFI);
- else
- processInstruction(I, MFI);
+
+ else {
+ if (MF.getSubtarget<X86Subtarget>().slow3OpsLEA()) {
+ if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
+ MFI->erase(I);
+ I = NewMI;
+ }
+ } else
+ processInstruction(I, MFI);
+ }
}
}
return false;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 11c08292518a..37b248416e4a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1140,7 +1140,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
- addRegisterClass(MVT::i1, &X86::VK1RegClass);
+ addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
@@ -1155,16 +1155,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
}
- setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::SETCC, MVT::i1, Custom);
- setOperationAction(ISD::SETCCE, MVT::i1, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
- setOperationAction(ISD::XOR, MVT::i1, Legal);
- setOperationAction(ISD::OR, MVT::i1, Legal);
- setOperationAction(ISD::AND, MVT::i1, Legal);
- setOperationAction(ISD::SUB, MVT::i1, Custom);
- setOperationAction(ISD::ADD, MVT::i1, Custom);
- setOperationAction(ISD::MUL, MVT::i1, Custom);
for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
@@ -1233,7 +1223,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MSTORE, VT, Custom);
}
}
- setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
@@ -1311,7 +1300,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
@@ -1699,7 +1690,7 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext& Context,
EVT VT) const {
if (!VT.isVector())
- return Subtarget.hasAVX512() ? MVT::i1: MVT::i8;
+ return MVT::i8;
if (VT.isSimple()) {
MVT VVT = VT.getSimpleVT();
@@ -2480,6 +2471,9 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
SelectionDAG &DAG) {
SDValue ValReturned = ValArg;
+ if (ValVT == MVT::v1i1)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
+
if (ValVT == MVT::v64i1) {
// In 32 bit machine, this case is handled by getv64i1Argument
assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
@@ -2502,7 +2496,6 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
}
-
return DAG.getBitcast(ValVT, ValReturned);
}
@@ -2809,8 +2802,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
SDValue Val = DAG.getLoad(
ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)
- : Val;
+ return ExtendedInMem
+ ? (VA.getValVT().isVector()
+ ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
+ : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
+ : Val;
}
// FIXME: Get this from tablegen.
@@ -2960,7 +2956,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
RC = &X86::VR64RegClass;
- else if (RegVT == MVT::i1)
+ else if (RegVT == MVT::v1i1)
RC = &X86::VK1RegClass;
else if (RegVT == MVT::v8i1)
RC = &X86::VK8RegClass;
@@ -6871,7 +6867,7 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (!In.isUndef())
- Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
+ Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
}
SDLoc dl(Op);
MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
@@ -6914,7 +6910,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(In))
NonConstIdx.push_back(idx);
else {
- Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
+ Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
HasConstElts = true;
}
if (SplatIdx < 0)
@@ -13946,7 +13942,6 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
SDValue Idx = Op.getOperand(1);
MVT EltVT = Op.getSimpleValueType();
- assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) &&
"Unexpected vector type in ExtractBitFromMaskVector");
@@ -13980,8 +13975,8 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(MaxSift, dl, MVT::i8));
- return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec,
- DAG.getIntPtrConstant(0, dl));
+ return DAG.getNode(X86ISD::VEXTRACT, dl, Op.getSimpleValueType(), Vec,
+ DAG.getIntPtrConstant(0, dl));
}
SDValue
@@ -13992,7 +13987,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
- if (Op.getSimpleValueType() == MVT::i1)
+ if (VecVT.getVectorElementType() == MVT::i1)
return ExtractBitFromMaskVector(Op, DAG);
if (!isa<ConstantSDNode>(Idx)) {
@@ -14163,10 +14158,13 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
return EltInVec;
}
- // Insertion of one bit into first or last position
- // can be done with two SHIFTs + OR.
+ // Insertion of one bit into first position
if (IdxVal == 0 ) {
- // EltInVec already at correct index and other bits are 0.
+ // Clean top bits of vector.
+ EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
+ DAG.getConstant(NumElems - 1, dl, MVT::i8));
+ EltInVec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, EltInVec,
+ DAG.getConstant(NumElems - 1, dl, MVT::i8));
// Clean the first bit in source vector.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
@@ -14175,6 +14173,7 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
+ // Insertion of one bit into last position
if (IdxVal == NumElems -1) {
// Move the bit to the last position inside the vector.
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
@@ -17322,8 +17321,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
- assert(((!Subtarget.hasAVX512() && VT == MVT::i8) || (VT == MVT::i1))
- && "SetCC type must be 8-bit or 1-bit integer");
+ assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
@@ -17457,7 +17455,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (SSECC != 8) {
if (Subtarget.hasAVX512()) {
- SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CondOp0,
+ SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
return DAG.getNode(VT.isVector() ? X86ISD::SELECT : X86ISD::SELECTS,
DL, VT, Cmp, Op1, Op2);
@@ -17505,9 +17503,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
// AVX512 fallback is to lower selects of scalar floats to masked moves.
- if (Cond.getValueType() == MVT::i1 && (VT == MVT::f64 || VT == MVT::f32) &&
- Subtarget.hasAVX512())
- return DAG.getNode(X86ISD::SELECTS, DL, VT, Cond, Op1, Op2);
+ if ((VT == MVT::f64 || VT == MVT::f32) && Subtarget.hasAVX512()) {
+ SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond);
+ return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
+ }
if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
SDValue Op1Scalar;
@@ -19048,8 +19047,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
/// \brief Creates an SDNode for a predicated scalar operation.
/// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc).
-/// The mask is coming as MVT::i8 and it should be truncated
-/// to MVT::i1 while lowering masking intrinsics.
+/// The mask is coming as MVT::i8 and it should be transformed
+/// to MVT::v1i1 while lowering masking intrinsics.
/// The main difference between ScalarMaskingNode and VectorMaskingNode is using
/// "X86select" instead of "vselect". We just can't create the "vselect" node
/// for a scalar instruction.
@@ -19064,11 +19063,10 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- // The mask should be of type MVT::i1
- SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
+ SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
if (Op.getOpcode() == X86ISD::FSETCCM ||
- Op.getOpcode() == X86ISD::FSETCCM_RND)
+ Op.getOpcode() == X86ISD::FSETCCM_RND)
return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
if (Op.getOpcode() == X86ISD::VFPCLASSS)
return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
@@ -19507,10 +19505,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
SDValue Src1 = Op.getOperand(1);
SDValue Imm = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
- SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm);
+ SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);
SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, FPclassMask);
+ return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, FPclassMask,
+ DAG.getIntPtrConstant(0, dl));
}
case CMP_MASK:
case CMP_MASK_CC: {
@@ -19570,18 +19569,18 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
if (IntrData->Opc1 != 0) {
SDValue Rnd = Op.getOperand(5);
if (!isRoundModeCurDirection(Rnd))
- Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd);
+ Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd);
}
//default rounding mode
if(!Cmp.getNode())
- Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC);
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);
SDValue CmpMask = getScalarMaskingNode(Cmp, Mask,
DAG.getTargetConstant(0, dl,
MVT::i1),
Subtarget, DAG);
-
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, CmpMask);
+ return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, CmpMask,
+ DAG.getIntPtrConstant(0, dl));
}
case COMI: { // Comparison intrinsics
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
@@ -19629,13 +19628,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
SDValue FCmp;
if (isRoundModeCurDirection(Sae))
- FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::i1, LHS, RHS,
- DAG.getConstant(CondVal, dl, MVT::i8));
+ FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
+ DAG.getConstant(CondVal, dl, MVT::i8));
else
- FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::i1, LHS, RHS,
- DAG.getConstant(CondVal, dl, MVT::i8), Sae);
- // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
- return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
+ FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS,
+ DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+ return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i32, FCmp,
+ DAG.getIntPtrConstant(0, dl));
}
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
@@ -23385,8 +23384,6 @@ static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
"Unexpected request for vector widening");
- EVT EltVT = NVT.getVectorElementType();
-
SDLoc dl(InOp);
if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
InOp.getNumOperands() == 2) {
@@ -23404,6 +23401,8 @@ static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
for (unsigned i = 0; i < InNumElts; ++i)
Ops.push_back(InOp.getOperand(i));
+ EVT EltVT = InOp.getOperand(0).getValueType();
+
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
DAG.getUNDEF(EltVT);
for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)
@@ -24709,16 +24708,22 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
// xbegin sinkMBB
//
// mainMBB:
- // eax = -1
+ // s0 = -1
+ //
+ // fallBB:
+ // eax = # XABORT_DEF
+ // s1 = eax
//
// sinkMBB:
- // v = eax
+ // v = phi(s0/mainBB, s1/fallBB)
MachineBasicBlock *thisMBB = MBB;
MachineFunction *MF = MBB->getParent();
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
+ MF->insert(I, fallMBB);
MF->insert(I, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
@@ -24726,25 +24731,40 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned fallDstReg = MRI.createVirtualRegister(RC);
+
// thisMBB:
- // xbegin sinkMBB
+ // xbegin fallMBB
// # fallthrough to mainMBB
- // # abortion to sinkMBB
- BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB);
+ // # abortion to fallMBB
+ BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);
thisMBB->addSuccessor(mainMBB);
- thisMBB->addSuccessor(sinkMBB);
+ thisMBB->addSuccessor(fallMBB);
// mainMBB:
- // EAX = -1
- BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1);
+ // mainDstReg := -1
+ BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);
+ BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
mainMBB->addSuccessor(sinkMBB);
- // sinkMBB:
- // EAX is live into the sinkMBB
- sinkMBB->addLiveIn(X86::EAX);
- BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY),
- MI.getOperand(0).getReg())
+ // fallMBB:
+ // ; pseudo instruction to model hardware's definition from XABORT
+ // EAX := XABORT_DEF
+ // fallDstReg := EAX
+ BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF));
+ BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg)
.addReg(X86::EAX);
+ fallMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(fallDstReg).addMBB(fallMBB);
MI.eraseFromParent();
return sinkMBB;
@@ -29574,7 +29594,7 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() &&
CondVT.getVectorElementType() == MVT::i1) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
- SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ SDValue CondNew = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
DAG.getAllOnesConstant(DL, CondVT));
// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
@@ -31321,13 +31341,11 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
if (Subtarget.hasAVX512()) {
- SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CMP00,
- CMP01,
- DAG.getConstant(x86cc, DL, MVT::i8));
- if (N->getValueType(0) != MVT::i1)
- return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0),
- FSetCC);
- return FSetCC;
+ SDValue FSetCC =
+ DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
+ DAG.getConstant(x86cc, DL, MVT::i8));
+ return DAG.getNode(X86ISD::VEXTRACT, DL, N->getSimpleValueType(0),
+ FSetCC, DAG.getIntPtrConstant(0, DL));
}
SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
CMP00.getValueType(), CMP00, CMP01,
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 71d395244b4a..f9344413bbcf 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -31,8 +31,7 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
// The mask VT.
- ValueType KVT = !cast<ValueType>(!if (!eq (NumElts, 1), "i1",
- "v" # NumElts # "i1"));
+ ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
// Suffix used in the instruction mnemonic.
string Suffix = suffix;
@@ -2263,7 +2262,7 @@ let Predicates = [HasAVX512, NoDQI] in {
let Predicates = [HasAVX512] in {
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
- def : Pat<(i1 (load addr:$src)),
+ def : Pat<(v1i1 (load addr:$src)),
(COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
(KMOVWkm addr:$src)>;
@@ -2280,77 +2279,45 @@ let Predicates = [HasBWI] in {
}
let Predicates = [HasAVX512] in {
- def : Pat<(i1 (trunc (i64 GR64:$src))),
- (COPY_TO_REGCLASS (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
- (i32 1)), VK1)>;
+ multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
+ def : Pat<(maskVT (scalar_to_vector GR32:$src)),
+ (COPY_TO_REGCLASS GR32:$src, maskRC)>;
- def : Pat<(i1 (trunc (i32 GR32:$src))),
- (COPY_TO_REGCLASS (AND32ri8 $src, (i32 1)), VK1)>;
+ def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS maskRC:$src, GR32)>;
- def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
- (COPY_TO_REGCLASS GR32:$src, VK1)>;
+ def : Pat<(maskVT (scalar_to_vector GR8:$src)),
+ (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
- def : Pat<(i1 (trunc (i8 GR8:$src))),
- (COPY_TO_REGCLASS
- (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR8:$src, sub_8bit), (i32 1)), VK1)>;
+ def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
- def : Pat<(i1 (trunc (i16 GR16:$src))),
- (COPY_TO_REGCLASS
- (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR16:$src, sub_16bit), (i32 1)), VK1)>;
-
- def : Pat<(i32 (zext VK1:$src)),
- (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1))>;
-
- def : Pat<(i32 (anyext VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, GR32)>;
-
- def : Pat<(i8 (zext VK1:$src)),
- (EXTRACT_SUBREG
- (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_8bit)>;
-
- def : Pat<(i8 (anyext VK1:$src)),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>;
+ def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))),
+ (COPY_TO_REGCLASS maskRC:$src, GR32)>;
+ }
- def : Pat<(i64 (zext VK1:$src)),
- (SUBREG_TO_REG (i64 0),
- (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_32bit)>;
+ defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
+ defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
+ defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
+ defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
+ defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
+ defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
+ defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
- def : Pat<(i64 (anyext VK1:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_32bit)>;
+ def : Pat<(X86kshiftr (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
+ (COPY_TO_REGCLASS
+ (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src, sub_8bit), (i32 1))), VK1)>;
+ def : Pat<(X86kshiftr (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
+ (COPY_TO_REGCLASS
+ (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src, sub_8bit), (i32 1))), VK16)>;
+ def : Pat<(X86kshiftr (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
+ (COPY_TO_REGCLASS
+ (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src, sub_8bit), (i32 1))), VK8)>;
- def : Pat<(i16 (zext VK1:$src)),
- (EXTRACT_SUBREG
- (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_16bit)>;
-
- def : Pat<(i16 (anyext VK1:$src)),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>;
-}
-def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK16)>;
-def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK8)>;
-def : Pat<(v4i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK4)>;
-def : Pat<(v2i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK2)>;
-def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK32)>;
-def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK64)>;
-
-def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
-def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
-def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
-
-def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))), (COPY_TO_REGCLASS VK64:$src, VK1)>;
-def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))), (COPY_TO_REGCLASS VK32:$src, VK1)>;
-def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))), (COPY_TO_REGCLASS VK16:$src, VK1)>;
-def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))), (COPY_TO_REGCLASS VK8:$src, VK1)>;
-def : Pat<(i1 (X86Vextract VK4:$src, (iPTR 0))), (COPY_TO_REGCLASS VK4:$src, VK1)>;
-def : Pat<(i1 (X86Vextract VK2:$src, (iPTR 0))), (COPY_TO_REGCLASS VK2:$src, VK1)>;
+}
// Mask unary operation
// - KNOT
@@ -2551,14 +2518,11 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
+ def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
- let AddedComplexity = 10 in { // To optimize isel table.
- def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
- def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
- def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
- }
+ def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
}
// Patterns for kmask insert_subvector/extract_subvector to/from index=0
@@ -2570,6 +2534,12 @@ multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subV
def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
(VT (COPY_TO_REGCLASS subRC:$src, RC))>;
}
+defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
+defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
+defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
+defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
+defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
+defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
@@ -3249,7 +3219,7 @@ multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
def : Pat<(_.VT (OpNode _.RC:$src0,
(_.VT (scalar_to_vector
- (_.EltVT (X86selects (i1 (trunc GR32:$mask)),
+ (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
(_.EltVT _.FRC:$src1),
(_.EltVT _.FRC:$src2))))))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrk)
@@ -3260,7 +3230,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
def : Pat<(_.VT (OpNode _.RC:$src0,
(_.VT (scalar_to_vector
- (_.EltVT (X86selects (i1 (trunc GR32:$mask)),
+ (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
(_.EltVT _.FRC:$src1),
(_.EltVT ZeroFP))))))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrkz)
@@ -3279,7 +3249,7 @@ def : Pat<(masked_store addr:$dst, Mask,
(iPTR 0))),
(iPTR 0)))),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
- (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
+ (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
}
@@ -3296,7 +3266,7 @@ def : Pat<(masked_store addr:$dst, Mask,
(iPTR 0))),
(iPTR 0)))),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
- (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
}
@@ -3310,7 +3280,7 @@ def : Pat<(_.info128.VT (extract_subvector
(v16i32 immAllZerosV))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
- (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
+ (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
addr:$srcAddr)>;
def : Pat<(_.info128.VT (extract_subvector
@@ -3322,7 +3292,7 @@ def : Pat<(_.info128.VT (extract_subvector
(iPTR 0))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
- (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
+ (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
addr:$srcAddr)>;
}
@@ -3338,7 +3308,7 @@ def : Pat<(_.info128.VT (extract_subvector
(v16i32 immAllZerosV))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
- (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
addr:$srcAddr)>;
def : Pat<(_.info128.VT (extract_subvector
@@ -3350,7 +3320,7 @@ def : Pat<(_.info128.VT (extract_subvector
(iPTR 0))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
- (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
addr:$srcAddr)>;
}
@@ -3381,7 +3351,7 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
- (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)),
+ (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
let hasSideEffects = 0 in
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 9867ba84bb9b..e2e228f5544b 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -274,7 +274,7 @@ def X86select : SDNode<"X86ISD::SELECT",
SDTCisSameNumEltsAs<0, 1>]>>;
def X86selects : SDNode<"X86ISD::SELECTS",
- SDTypeProfile<1, 3, [SDTCisVT<1, i1>,
+ SDTypeProfile<1, 3, [SDTCisVT<1, v1i1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<2, 3>]>>;
@@ -441,7 +441,7 @@ def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
SDTCisSameNumEltsAs<0,1>,
SDTCisVT<2, i32>]>, []>;
def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS",
- SDTypeProfile<1, 2, [SDTCisVT<0, i1>,
+ SDTypeProfile<1, 2, [SDTCisVT<0, v1i1>,
SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>;
def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
@@ -451,7 +451,7 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2,
- [SDTCisEltOfVec<0, 1>, SDTCisVec<1>,
+ [SDTCisVec<1>,
SDTCisPtrTy<2>]>, []>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 092ceb207ada..f7083a7448ce 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -10511,9 +10511,7 @@ void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
MachineFunction &MF,
- bool IsTailCall) const {
- return;
-}
+ bool IsTailCall) const {}
MachineBasicBlock::iterator
X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 4d7d8ece92d9..01df07e1715f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -896,9 +896,16 @@ def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
"TM.getCodeModel() == CodeModel::Kernel">;
def IsNotPIC : Predicate<"!TM.isPositionIndependent()">;
-def OptForSize : Predicate<"Subtarget->getOptForSize()">;
-def OptForMinSize : Predicate<"Subtarget->getOptForMinSize()">;
-def OptForSpeed : Predicate<"!Subtarget->getOptForSize()">;
+
+// We could compute these on a per-module basis but doing so requires accessing
+// the Function object through the <Target>Subtarget and objections were raised
+// to that (see post-commit review comments for r301750).
+let RecomputePerFunction = 1 in {
+ def OptForSize : Predicate<"MF->getFunction()->optForSize()">;
+ def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">;
+ def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">;
+}
+
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index 38ac8be94483..61aac58a491f 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -30,6 +30,11 @@ def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
"xbegin\t$dst", []>, OpSize32;
}
+// Psuedo instruction to fake the definition of EAX on the fallback code path.
+let isPseudo = 1, Defs = [EAX] in {
+def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
+}
+
def XEND : I<0x01, MRM_D5, (outs), (ins),
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index 5eb5ad52840a..61956f741820 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -449,24 +449,30 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I,
if (!SrcRC)
return false;
- if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
- !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
- DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
- return false;
- }
-
+ unsigned SubIdx;
if (DstRC == SrcRC) {
// Nothing to be done
+ SubIdx = X86::NoSubRegister;
} else if (DstRC == &X86::GR32RegClass) {
- I.getOperand(1).setSubReg(X86::sub_32bit);
+ SubIdx = X86::sub_32bit;
} else if (DstRC == &X86::GR16RegClass) {
- I.getOperand(1).setSubReg(X86::sub_16bit);
+ SubIdx = X86::sub_16bit;
} else if (DstRC == &X86::GR8RegClass) {
- I.getOperand(1).setSubReg(X86::sub_8bit);
+ SubIdx = X86::sub_8bit;
} else {
return false;
}
+ SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
+
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ return false;
+ }
+
+ I.getOperand(1).setSubReg(SubIdx);
+
I.setDesc(TII.get(X86::COPY));
return true;
}
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index 8ce240714f17..da724f5d8989 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -184,6 +184,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
return;
const LLT s64 = LLT::scalar(64);
+ const LLT v16s8 = LLT::vector(16, 8);
const LLT v8s16 = LLT::vector(8, 16);
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
@@ -193,7 +194,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
setAction({BinOp, Ty}, Legal);
for (unsigned BinOp : {G_ADD, G_SUB})
- for (auto Ty : {v4s32})
+ for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
setAction({BinOp, Ty}, Legal);
setAction({G_MUL, v8s16}, Legal);
@@ -212,8 +213,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX2() {
if (!Subtarget.hasAVX2())
return;
+ const LLT v32s8 = LLT::vector(32, 8);
const LLT v16s16 = LLT::vector(16, 16);
const LLT v8s32 = LLT::vector(8, 32);
+ const LLT v4s64 = LLT::vector(4, 64);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
+ setAction({BinOp, Ty}, Legal);
for (auto Ty : {v16s16, v8s32})
setAction({G_MUL, Ty}, Legal);
@@ -224,6 +231,11 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() {
return;
const LLT v16s32 = LLT::vector(16, 32);
+ const LLT v8s64 = LLT::vector(8, 64);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v16s32, v8s64})
+ setAction({BinOp, Ty}, Legal);
setAction({G_MUL, v16s32}, Legal);
@@ -261,8 +273,13 @@ void X86LegalizerInfo::setLegalizerInfoAVX512BW() {
if (!(Subtarget.hasAVX512() && Subtarget.hasBWI()))
return;
+ const LLT v64s8 = LLT::vector(64, 8);
const LLT v32s16 = LLT::vector(32, 16);
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v64s8, v32s16})
+ setAction({BinOp, Ty}, Legal);
+
setAction({G_MUL, v32s16}, Legal);
/************ VLX *******************/
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index d235d2b40b15..3a61a7247c72 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -511,7 +511,7 @@ def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 31)>;
// Mask registers
-def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
+def VK1 : RegisterClass<"X86", [v1i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
@@ -519,7 +519,7 @@ def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
-def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;}
+def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;}
def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index d66d39dcee17..2b1f43bffd71 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -320,6 +320,7 @@ void X86Subtarget::initializeEnvironment() {
CallRegIndirect = false;
LEAUsesAG = false;
SlowLEA = false;
+ Slow3OpsLEA = false;
SlowIncDec = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
@@ -336,8 +337,7 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const X86TargetMachine &TM,
- unsigned StackAlignOverride, bool OptForSize,
- bool OptForMinSize)
+ unsigned StackAlignOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
@@ -347,8 +347,7 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(*this, getStackAlignment()), OptForSize(OptForSize),
- OptForMinSize(OptForMinSize) {
+ FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
if (!isPositionIndependent())
setPICStyle(PICStyles::None);
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index de1514243aeb..a9f3a2aee1be 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -253,6 +253,11 @@ protected:
/// True if the LEA instruction with certain arguments is slow
bool SlowLEA;
+ /// True if the LEA instruction has all three source operands: base, index,
+ /// and offset or if the LEA instruction uses base and index registers where
+ /// the base is EBP, RBP,or R13
+ bool Slow3OpsLEA;
+
/// True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
@@ -331,16 +336,12 @@ private:
X86TargetLowering TLInfo;
X86FrameLowering FrameLowering;
- bool OptForSize;
- bool OptForMinSize;
-
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const X86TargetMachine &TM, unsigned StackAlignOverride,
- bool OptForSize, bool OptForMinSize);
+ const X86TargetMachine &TM, unsigned StackAlignOverride);
/// This object will take onwership of \p GISelAccessor.
void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); }
@@ -490,6 +491,7 @@ public:
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
+ bool slow3OpsLEA() const { return Slow3OpsLEA; }
bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
@@ -507,9 +509,6 @@ public:
bool isSLM() const { return X86ProcFamily == IntelSLM; }
bool useSoftFloat() const { return UseSoftFloat; }
- bool getOptForSize() const { return OptForSize; }
- bool getOptForMinSize() const { return OptForMinSize; }
-
/// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
/// no-sse2). There isn't any reason to disable it if the target processor
/// supports it.
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 9a82e6e50463..53a8e83b36fc 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -61,6 +61,7 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
namespace llvm {
void initializeWinEHStatePassPass(PassRegistry &);
+void initializeFixupLEAPassPass(PassRegistry &);
void initializeX86ExecutionDepsFixPass(PassRegistry &);
} // end namespace llvm
@@ -75,6 +76,7 @@ extern "C" void LLVMInitializeX86Target() {
initializeWinEHStatePassPass(PR);
initializeFixupBWInstPassPass(PR);
initializeEvexToVexInstPassPass(PR);
+ initializeFixupLEAPassPass(PR);
initializeX86ExecutionDepsFixPass(PR);
}
@@ -268,12 +270,6 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
FS = Key.substr(CPU.size());
- bool OptForSize = F.optForSize();
- bool OptForMinSize = F.optForMinSize();
-
- Key += std::string(OptForSize ? "+" : "-") + "optforsize";
- Key += std::string(OptForMinSize ? "+" : "-") + "optforminsize";
-
auto &I = SubtargetMap[Key];
if (!I) {
// This needs to be done before we create a new subtarget since any
@@ -281,8 +277,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
- Options.StackAlignmentOverride,
- OptForSize, OptForMinSize);
+ Options.StackAlignmentOverride);
#ifndef LLVM_BUILD_GLOBAL_ISEL
GISelAccessor *GISel = new GISelAccessor();
#else
@@ -378,12 +373,12 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
}
void X86PassConfig::addIRPasses() {
- addPass(createAtomicExpandPass(&getX86TargetMachine()));
+ addPass(createAtomicExpandPass());
TargetPassConfig::addIRPasses();
if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createInterleavedAccessPass(TM));
+ addPass(createInterleavedAccessPass());
}
bool X86PassConfig::addInstSelector() {
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 8566bd91c89e..fe94079fd869 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1392,15 +1392,47 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
// CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll
// CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll
// CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll
+ static const CostTblEntry AVX512CDCostTbl[] = {
+ { ISD::CTLZ, MVT::v8i64, 1 },
+ { ISD::CTLZ, MVT::v16i32, 1 },
+ { ISD::CTLZ, MVT::v32i16, 8 },
+ { ISD::CTLZ, MVT::v64i8, 20 },
+ { ISD::CTLZ, MVT::v4i64, 1 },
+ { ISD::CTLZ, MVT::v8i32, 1 },
+ { ISD::CTLZ, MVT::v16i16, 4 },
+ { ISD::CTLZ, MVT::v32i8, 10 },
+ { ISD::CTLZ, MVT::v2i64, 1 },
+ { ISD::CTLZ, MVT::v4i32, 1 },
+ { ISD::CTLZ, MVT::v8i16, 4 },
+ { ISD::CTLZ, MVT::v16i8, 4 },
+ };
static const CostTblEntry AVX512BWCostTbl[] = {
{ ISD::BITREVERSE, MVT::v8i64, 5 },
{ ISD::BITREVERSE, MVT::v16i32, 5 },
{ ISD::BITREVERSE, MVT::v32i16, 5 },
{ ISD::BITREVERSE, MVT::v64i8, 5 },
+ { ISD::CTLZ, MVT::v8i64, 23 },
+ { ISD::CTLZ, MVT::v16i32, 22 },
+ { ISD::CTLZ, MVT::v32i16, 18 },
+ { ISD::CTLZ, MVT::v64i8, 17 },
+ { ISD::CTPOP, MVT::v8i64, 7 },
+ { ISD::CTPOP, MVT::v16i32, 11 },
+ { ISD::CTPOP, MVT::v32i16, 9 },
+ { ISD::CTPOP, MVT::v64i8, 6 },
+ { ISD::CTTZ, MVT::v8i64, 10 },
+ { ISD::CTTZ, MVT::v16i32, 14 },
+ { ISD::CTTZ, MVT::v32i16, 12 },
+ { ISD::CTTZ, MVT::v64i8, 9 },
};
static const CostTblEntry AVX512CostTbl[] = {
{ ISD::BITREVERSE, MVT::v8i64, 36 },
{ ISD::BITREVERSE, MVT::v16i32, 24 },
+ { ISD::CTLZ, MVT::v8i64, 29 },
+ { ISD::CTLZ, MVT::v16i32, 35 },
+ { ISD::CTPOP, MVT::v8i64, 16 },
+ { ISD::CTPOP, MVT::v16i32, 24 },
+ { ISD::CTTZ, MVT::v8i64, 20 },
+ { ISD::CTTZ, MVT::v16i32, 28 },
};
static const CostTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, 4 },
@@ -1560,6 +1592,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
MVT MTy = LT.second;
// Attempt to lookup cost.
+ if (ST->hasCDI())
+ if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
return LT.first * Entry->Cost;
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index e28e05c7f6a8..2950e2efbea3 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -74,7 +74,7 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void XCorePassConfig::addIRPasses() {
- addPass(createAtomicExpandPass(&getXCoreTargetMachine()));
+ addPass(createAtomicExpandPass());
TargetPassConfig::addIRPasses();
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 203594572618..ec06d5f9fb05 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -44,8 +44,12 @@
using namespace llvm;
static cl::opt<bool>
-RunLoopVectorization("vectorize-loops", cl::Hidden,
- cl::desc("Run the Loop vectorization passes"));
+ RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Run Partial inlinining pass"));
+
+static cl::opt<bool>
+ RunLoopVectorization("vectorize-loops", cl::Hidden,
+ cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::Hidden,
@@ -516,6 +520,8 @@ void PassManagerBuilder::populateModulePassManager(
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (RunPartialInlining)
+ MPM.add(createPartialInliningPass());
if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO &&
!PrepareForThinLTO)
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 0ca62b7ae40c..733eeb1767a3 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -852,8 +852,9 @@ Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) {
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
/// TODO: Handle this for Vectors.
-bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
- Instruction &CxtI) {
+bool InstCombiner::willNotOverflowSignedSub(const Value *LHS,
+ const Value *RHS,
+ const Instruction &CxtI) const {
// If LHS and RHS each have at least two sign bits, the subtraction
// cannot overflow.
if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
@@ -869,8 +870,8 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
// Subtraction of two 2's complement numbers having identical signs will
// never overflow.
- if ((LHSKnown.One[BitWidth - 1] && RHSKnown.One[BitWidth - 1]) ||
- (LHSKnown.Zero[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]))
+ if ((LHSKnown.isNegative() && RHSKnown.isNegative()) ||
+ (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()))
return true;
// TODO: implement logic similar to checkRippleForAdd
@@ -879,8 +880,9 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
/// \brief Return true if we can prove that:
/// (sub LHS, RHS) === (sub nuw LHS, RHS)
-bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
- Instruction &CxtI) {
+bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS,
+ const Value *RHS,
+ const Instruction &CxtI) const {
// If the LHS is negative and the RHS is non-negative, no unsigned wrap.
KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI);
KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI);
@@ -1180,7 +1182,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Constant *CI =
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
if (ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
+ willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new, smaller add.
Value *NewAdd =
Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
@@ -1197,7 +1199,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (LHSConv->getOperand(0)->getType() ==
RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+ willNotOverflowSignedAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
@@ -1275,10 +1277,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- // TODO(jingyue): Consider WillNotOverflowSignedAdd and
+ // TODO(jingyue): Consider willNotOverflowSignedAdd and
// willNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
+ if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -1351,7 +1353,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
+ willNotOverflowSignedAdd(LHSIntVal, CI, I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
CI, "addconv");
@@ -1370,7 +1372,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// and if the integer add will not overflow.
if (LHSIntVal->getType() == RHSIntVal->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
+ willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
RHSIntVal, "addconv");
@@ -1676,11 +1678,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return replaceInstUsesWith(I, Res);
bool Changed = false;
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
+ if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
- if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
+ if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 82dc88f1b3ad..4227b2d01be8 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -764,7 +764,7 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
}
/// Fold (icmp)&(icmp) if possible.
-Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
@@ -943,7 +943,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
/// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns
/// a Value which should already be inserted into the function.
-Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
@@ -1126,8 +1126,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src);
ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src);
if (ICmp0 && ICmp1) {
- Value *Res = LogicOpc == Instruction::And ? FoldAndOfICmps(ICmp0, ICmp1)
- : FoldOrOfICmps(ICmp0, ICmp1, &I);
+ Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1)
+ : foldOrOfICmps(ICmp0, ICmp1, &I);
if (Res)
return CastInst::Create(CastOpcode, Res, DestTy);
return nullptr;
@@ -1138,8 +1138,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
FCmpInst *FCmp0 = dyn_cast<FCmpInst>(Cast0Src);
FCmpInst *FCmp1 = dyn_cast<FCmpInst>(Cast1Src);
if (FCmp0 && FCmp1) {
- Value *Res = LogicOpc == Instruction::And ? FoldAndOfFCmps(FCmp0, FCmp1)
- : FoldOrOfFCmps(FCmp0, FCmp1);
+ Value *Res = LogicOpc == Instruction::And ? foldAndOfFCmps(FCmp0, FCmp1)
+ : foldOrOfFCmps(FCmp0, FCmp1);
if (Res)
return CastInst::Create(CastOpcode, Res, DestTy);
return nullptr;
@@ -1425,7 +1425,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
if (LHS && RHS)
- if (Value *Res = FoldAndOfICmps(LHS, RHS))
+ if (Value *Res = foldAndOfICmps(LHS, RHS))
return replaceInstUsesWith(I, Res);
// TODO: Make this recursive; it's a little tricky because an arbitrary
@@ -1433,18 +1433,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
Value *X, *Y;
if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = FoldAndOfICmps(LHS, Cmp))
+ if (Value *Res = foldAndOfICmps(LHS, Cmp))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = FoldAndOfICmps(LHS, Cmp))
+ if (Value *Res = foldAndOfICmps(LHS, Cmp))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
}
if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = FoldAndOfICmps(Cmp, RHS))
+ if (Value *Res = foldAndOfICmps(Cmp, RHS))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = FoldAndOfICmps(Cmp, RHS))
+ if (Value *Res = foldAndOfICmps(Cmp, RHS))
return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
}
}
@@ -1452,7 +1452,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// If and'ing two fcmp, try combine them into one.
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+ if (Value *Res = foldAndOfFCmps(LHS, RHS))
return replaceInstUsesWith(I, Res);
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
@@ -1589,7 +1589,7 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
}
/// Fold (icmp)|(icmp) if possible.
-Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction *CxtI) {
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
@@ -1846,7 +1846,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
/// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns
/// a Value which should already be inserted into the function.
-Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
@@ -2197,7 +2197,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
if (LHS && RHS)
- if (Value *Res = FoldOrOfICmps(LHS, RHS, &I))
+ if (Value *Res = foldOrOfICmps(LHS, RHS, &I))
return replaceInstUsesWith(I, Res);
// TODO: Make this recursive; it's a little tricky because an arbitrary
@@ -2205,18 +2205,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *X, *Y;
if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I))
+ if (Value *Res = foldOrOfICmps(LHS, Cmp, &I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I))
+ if (Value *Res = foldOrOfICmps(LHS, Cmp, &I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
}
if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I))
+ if (Value *Res = foldOrOfICmps(Cmp, RHS, &I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I))
+ if (Value *Res = foldOrOfICmps(Cmp, RHS, &I))
return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
}
}
@@ -2224,7 +2224,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ if (Value *Res = foldOrOfFCmps(LHS, RHS))
return replaceInstUsesWith(I, Res);
if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
@@ -2320,6 +2320,24 @@ static Instruction *foldXorToXor(BinaryOperator &I) {
return nullptr;
}
+Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+ if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+ if (LHS->getOperand(0) == RHS->getOperand(1) &&
+ LHS->getOperand(1) == RHS->getOperand(0))
+ LHS->swapOperands();
+ if (LHS->getOperand(0) == RHS->getOperand(0) &&
+ LHS->getOperand(1) == RHS->getOperand(1)) {
+ // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+ Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+ unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+ bool isSigned = LHS->isSigned() || RHS->isSigned();
+ return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
+ }
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -2579,23 +2597,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
match(Op1, m_Not(m_Specific(A))))
return BinaryOperator::CreateNot(Builder->CreateAnd(A, B));
- // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
- if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
- if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
- if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
- if (LHS->getOperand(0) == RHS->getOperand(1) &&
- LHS->getOperand(1) == RHS->getOperand(0))
- LHS->swapOperands();
- if (LHS->getOperand(0) == RHS->getOperand(0) &&
- LHS->getOperand(1) == RHS->getOperand(1)) {
- Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
- unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
- bool isSigned = LHS->isSigned() || RHS->isSigned();
- return replaceInstUsesWith(I,
- getNewICmpValue(isSigned, Code, Op0, Op1,
- Builder));
- }
- }
+ if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+ if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+ if (Value *V = foldXorOfICmps(LHS, RHS))
+ return replaceInstUsesWith(I, V);
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
return CastedXor;
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 001a4bcf16f3..f4bf5221f6a2 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -585,6 +585,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return CastInst::CreateIntegerCast(Shift, DestTy, false);
}
+ // FIXME: We should canonicalize to zext/trunc and remove this transform.
// Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
// conversion.
// It works because bits coming from sign extension have the same value as
@@ -595,18 +596,24 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *SExt = cast<Instruction>(Src)->getOperand(0);
const unsigned SExtSize = SExt->getType()->getPrimitiveSizeInBits();
const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+ const unsigned CISize = CI.getType()->getPrimitiveSizeInBits();
+ const unsigned MaxAmt = SExtSize - std::max(CISize, ASize);
unsigned ShiftAmt = Cst->getZExtValue();
+
// This optimization can be only performed when zero bits generated by
// the original lshr aren't pulled into the value after truncation, so we
// can only shift by values no larger than the number of extension bits.
// FIXME: Instead of bailing when the shift is too large, use and to clear
// the extra bits.
- if (SExt->hasOneUse() && ShiftAmt <= SExtSize - ASize) {
- // If shifting by the size of the original value in bits or more, it is
- // being filled with the sign bit, so shift by ASize-1 to avoid ub.
- Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1));
- Shift->takeName(Src);
- return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+ if (ShiftAmt <= MaxAmt) {
+ if (CISize == ASize)
+ return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(),
+ std::min(ShiftAmt, ASize - 1)));
+ if (SExt->hasOneUse()) {
+ Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1));
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+ }
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 60ed4057cedd..6492eaedae9c 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3506,7 +3506,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
// We can strength reduce this signed add into a regular add if we can prove
// that it will never overflow.
if (OCF == OCF_SIGNED_ADD)
- if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
+ if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
true);
break;
@@ -3519,11 +3519,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
return SetResult(LHS, Builder->getFalse(), false);
if (OCF == OCF_SIGNED_SUB) {
- if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
+ if (willNotOverflowSignedSub(LHS, RHS, OrigI))
return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
true);
} else {
- if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
+ if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
true);
}
@@ -3553,7 +3553,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
return SetResult(LHS, Builder->getFalse(), false);
if (OCF == OCF_SIGNED_MUL)
- if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
+ if (willNotOverflowSignedMul(LHS, RHS, OrigI))
return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
true);
break;
@@ -4260,6 +4260,80 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne));
}
+/// Integer compare with boolean values can always be turned into bitwise ops.
+static Instruction *canonicalizeICmpBool(ICmpInst &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *A = I.getOperand(0), *B = I.getOperand(1);
+ assert(A->getType()->getScalarType()->isIntegerTy(1) && "Bools only");
+
+ // A boolean compared to true/false can be simplified to Op0/true/false in
+ // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
+ // Cases not handled by InstSimplify are always 'not' of Op0.
+ if (match(B, m_Zero())) {
+ switch (I.getPredicate()) {
+ case CmpInst::ICMP_EQ: // A == 0 -> !A
+ case CmpInst::ICMP_ULE: // A <=u 0 -> !A
+ case CmpInst::ICMP_SGE: // A >=s 0 -> !A
+ return BinaryOperator::CreateNot(A);
+ default:
+ llvm_unreachable("ICmp i1 X, C not simplified as expected.");
+ }
+ } else if (match(B, m_One())) {
+ switch (I.getPredicate()) {
+ case CmpInst::ICMP_NE: // A != 1 -> !A
+ case CmpInst::ICMP_ULT: // A <u 1 -> !A
+ case CmpInst::ICMP_SGT: // A >s -1 -> !A
+ return BinaryOperator::CreateNot(A);
+ default:
+ llvm_unreachable("ICmp i1 X, C not simplified as expected.");
+ }
+ }
+
+ switch (I.getPredicate()) {
+ default:
+ llvm_unreachable("Invalid icmp instruction!");
+ case ICmpInst::ICMP_EQ:
+ // icmp eq i1 A, B -> ~(A ^ B)
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
+ case ICmpInst::ICMP_NE:
+ // icmp ne i1 A, B -> A ^ B
+ return BinaryOperator::CreateXor(A, B);
+
+ case ICmpInst::ICMP_UGT:
+ // icmp ugt -> icmp ult
+ std::swap(A, B);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_ULT:
+ // icmp ult i1 A, B -> ~A & B
+ return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
+
+ case ICmpInst::ICMP_SGT:
+ // icmp sgt -> icmp slt
+ std::swap(A, B);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_SLT:
+ // icmp slt i1 A, B -> A & ~B
+ return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
+
+ case ICmpInst::ICMP_UGE:
+ // icmp uge -> icmp ule
+ std::swap(A, B);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_ULE:
+ // icmp ule i1 A, B -> ~A | B
+ return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
+
+ case ICmpInst::ICMP_SGE:
+ // icmp sge -> icmp sle
+ std::swap(A, B);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_SLE:
+ // icmp sle i1 A, B -> A | ~B
+ return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
+ }
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -4297,49 +4371,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
- Type *Ty = Op0->getType();
-
- // icmp's with boolean values can always be turned into bitwise operations
- if (Ty->getScalarType()->isIntegerTy(1)) {
- switch (I.getPredicate()) {
- default: llvm_unreachable("Invalid icmp instruction!");
- case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B)
- Value *Xor = Builder->CreateXor(Op0, Op1, I.getName() + "tmp");
- return BinaryOperator::CreateNot(Xor);
- }
- case ICmpInst::ICMP_NE: // icmp ne i1 A, B -> A^B
- return BinaryOperator::CreateXor(Op0, Op1);
-
- case ICmpInst::ICMP_UGT:
- std::swap(Op0, Op1); // Change icmp ugt -> icmp ult
- LLVM_FALLTHROUGH;
- case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B
- Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp");
- return BinaryOperator::CreateAnd(Not, Op1);
- }
- case ICmpInst::ICMP_SGT:
- std::swap(Op0, Op1); // Change icmp sgt -> icmp slt
- LLVM_FALLTHROUGH;
- case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B
- Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp");
- return BinaryOperator::CreateAnd(Not, Op0);
- }
- case ICmpInst::ICMP_UGE:
- std::swap(Op0, Op1); // Change icmp uge -> icmp ule
- LLVM_FALLTHROUGH;
- case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B
- Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp");
- return BinaryOperator::CreateOr(Not, Op1);
- }
- case ICmpInst::ICMP_SGE:
- std::swap(Op0, Op1); // Change icmp sge -> icmp sle
- LLVM_FALLTHROUGH;
- case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B
- Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp");
- return BinaryOperator::CreateOr(Not, Op0);
- }
- }
- }
+ if (Op0->getType()->getScalarType()->isIntegerTy(1))
+ if (Instruction *Res = canonicalizeICmpBool(I, *Builder))
+ return Res;
if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
return NewICmp;
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index f88a2c6acc3f..6829be86885b 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -275,11 +275,7 @@ public:
Instruction *visitSDiv(BinaryOperator &I);
Instruction *visitFDiv(BinaryOperator &I);
Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted);
- Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
- Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Instruction *visitAnd(BinaryOperator &I);
- Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI);
- Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A,
Value *B, Value *C);
Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A,
@@ -410,18 +406,24 @@ private:
bool DoTransform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
- bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI) {
+ bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const {
return computeOverflowForSignedAdd(LHS, RHS, &CxtI) ==
OverflowResult::NeverOverflows;
};
- bool willNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction &CxtI) {
+ bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const {
return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) ==
OverflowResult::NeverOverflows;
};
- bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
- bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
- bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI);
- bool willNotOverflowUnsignedMul(Value *LHS, Value *RHS, Instruction &CxtI) {
+ bool willNotOverflowSignedSub(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const;
+ bool willNotOverflowUnsignedSub(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const;
+ bool willNotOverflowSignedMul(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const;
+ bool willNotOverflowUnsignedMul(const Value *LHS, const Value *RHS,
+ const Instruction &CxtI) const {
return computeOverflowForUnsignedMul(LHS, RHS, &CxtI) ==
OverflowResult::NeverOverflows;
};
@@ -445,6 +447,12 @@ private:
Instruction::CastOps isEliminableCastPair(const CastInst *CI1,
const CastInst *CI2);
+ Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI);
+ Value *foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+ Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+
public:
/// \brief Inserts an instruction \p New before instruction \p Old
///
@@ -523,29 +531,31 @@ public:
return nullptr; // Don't do anything with FI
}
- void computeKnownBits(Value *V, KnownBits &Known,
- unsigned Depth, Instruction *CxtI) const {
+ void computeKnownBits(const Value *V, KnownBits &Known,
+ unsigned Depth, const Instruction *CxtI) const {
llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT);
}
- KnownBits computeKnownBits(Value *V, unsigned Depth,
- Instruction *CxtI) const {
+ KnownBits computeKnownBits(const Value *V, unsigned Depth,
+ const Instruction *CxtI) const {
return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT);
}
- bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
- Instruction *CxtI = nullptr) const {
+ bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) const {
return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT);
}
- unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0,
- Instruction *CxtI = nullptr) const {
+ unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) const {
return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
}
- OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
- const Instruction *CxtI) {
+ OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
+ const Value *RHS,
+ const Instruction *CxtI) const {
return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
}
- OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
- const Instruction *CxtI) {
+ OverflowResult computeOverflowForUnsignedAdd(const Value *LHS,
+ const Value *RHS,
+ const Instruction *CxtI) const {
return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT);
}
OverflowResult computeOverflowForSignedAdd(const Value *LHS,
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2a35259f2103..fc13854f8fe7 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -132,8 +132,9 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
/// \brief Return true if we can prove that:
/// (mul LHS, RHS) === (mul nsw LHS, RHS)
-bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
- Instruction &CxtI) {
+bool InstCombiner::willNotOverflowSignedMul(const Value *LHS,
+ const Value *RHS,
+ const Instruction &CxtI) const {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -384,7 +385,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
Constant *CI =
ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType());
if (ConstantExpr::getSExt(CI, I.getType()) == Op1C &&
- WillNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
+ willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
// Insert the new, smaller mul.
Value *NewMul =
Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
@@ -401,7 +402,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (Op0Conv->getOperand(0)->getType() ==
Op1Conv->getOperand(0)->getType() &&
(Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) &&
- WillNotOverflowSignedMul(Op0Conv->getOperand(0),
+ willNotOverflowSignedMul(Op0Conv->getOperand(0),
Op1Conv->getOperand(0), I)) {
// Insert the new integer mul.
Value *NewMul = Builder->CreateNSWMul(
@@ -447,7 +448,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
+ if (!I.hasNoSignedWrap() && willNotOverflowSignedMul(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index d8f8a58a5fdf..c4f450949e6d 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -606,7 +606,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (unsigned Count = replaceDominatedUsesWith(
CondInst, TorF, DT, BasicBlockEdge(Pred, BB))) {
Changed = true;
- NumCSECVP = NumCSECVP + Count;
+ NumCSECVP += Count;
}
}
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04646eed49a..0490d93f6455 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -2057,7 +2057,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
// If we failed insertion, make sure we remove the instruction.
DEBUG(verifyRemoved(PREInstr));
- delete PREInstr;
+ PREInstr->deleteValue();
return false;
}
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index ae353ea44595..ada22ae38eb8 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -833,15 +833,13 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
CondBr->eraseFromParent();
if (CondCmp->use_empty())
CondCmp->eraseFromParent();
- else if (CondCmp->getParent() == BB) {
- // If the fact we just learned is true for all uses of the
- // condition, replace it with a constant value
- auto *CI = Ret == LazyValueInfo::True ?
- ConstantInt::getTrue(CondCmp->getType()) :
- ConstantInt::getFalse(CondCmp->getType());
- CondCmp->replaceAllUsesWith(CI);
- CondCmp->eraseFromParent();
- }
+ // TODO: We can safely replace *some* uses of the CondInst if it has
+ // exactly one value as returned by LVI. RAUW is incorrect in the
+ // presence of guards and assumes, that have the `Cond` as the use. This
+ // is because we use the guards/assume to reason about the `Cond` value
+ // at the end of block, but RAUW unconditionally replaces all uses
+ // including the guards/assumes themselves and the uses before the
+ // guard/assume.
return true;
}
@@ -1327,14 +1325,13 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
CondInst->eraseFromParent();
- else if (OnlyVal && OnlyVal != MultipleVal &&
- CondInst->getParent() == BB) {
- // If we just learned Cond is the same value for all uses of the
- // condition, replace it with a constant value
- CondInst->replaceAllUsesWith(OnlyVal);
- if (!CondInst->mayHaveSideEffects())
- CondInst->eraseFromParent();
- }
+ // TODO: We can safely replace *some* uses of the CondInst if it has
+ // exactly one value as returned by LVI. RAUW is incorrect in the
+ // presence of guards and assumes, that have the `Cond` as the use. This
+ // is because we use the guards/assume to reason about the `Cond` value
+ // at the end of block, but RAUW unconditionally replaces all uses
+ // including the guards/assumes themselves and the uses before the
+ // guard/assume.
}
return true;
}
@@ -1909,7 +1906,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
{BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
ValueMapping[&*BI] = IV;
if (!New->mayHaveSideEffects()) {
- delete New;
+ New->deleteValue();
New = nullptr;
}
} else {
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 02215d3450c2..494cbc61bc9c 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -228,7 +228,7 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
L.Load->replaceAllUsesWith(V);
}
- NumLoadsCombined = NumLoadsCombined + Loads.size();
+ NumLoadsCombined += Loads.size();
return true;
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index cb6223b070a6..97337ea5ba62 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -110,6 +110,15 @@ private:
bool HasMemset;
bool HasMemsetPattern;
bool HasMemcpy;
+ /// Return code for isLegalStore()
+ enum LegalStoreKind {
+ None = 0,
+ Memset,
+ MemsetPattern,
+ Memcpy,
+ DontUse // Dummy retval never to be used. Allows catching errors in retval
+ // handling.
+ };
/// \name Countable Loop Idiom Handling
/// @{
@@ -119,8 +128,7 @@ private:
SmallVectorImpl<BasicBlock *> &ExitBlocks);
void collectStores(BasicBlock *BB);
- bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern,
- bool &ForMemcpy);
+ LegalStoreKind isLegalStore(StoreInst *SI);
bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
bool ForMemset);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
@@ -343,20 +351,20 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
}
-bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
- bool &ForMemsetPattern, bool &ForMemcpy) {
+LoopIdiomRecognize::LegalStoreKind
+LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// Don't touch volatile stores.
if (!SI->isSimple())
- return false;
+ return LegalStoreKind::None;
// Don't convert stores of non-integral pointer types to memsets (which stores
// integers).
if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
- return false;
+ return LegalStoreKind::None;
// Avoid merging nontemporal stores.
if (SI->getMetadata(LLVMContext::MD_nontemporal))
- return false;
+ return LegalStoreKind::None;
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
@@ -364,7 +372,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
// Reject stores that are so large that they overflow an unsigned.
uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
- return false;
+ return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
@@ -372,11 +380,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
const SCEVAddRecExpr *StoreEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
- return false;
+ return LegalStoreKind::None;
// Check to see if we have a constant stride.
if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
- return false;
+ return LegalStoreKind::None;
// See if the store can be turned into a memset.
@@ -394,15 +402,13 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// It looks like we can use SplatValue.
- ForMemset = true;
- return true;
+ return LegalStoreKind::Memset;
} else if (HasMemsetPattern &&
// Don't create memset_pattern16s with address spaces.
StorePtr->getType()->getPointerAddressSpace() == 0 &&
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
// It looks like we can use PatternValue!
- ForMemsetPattern = true;
- return true;
+ return LegalStoreKind::MemsetPattern;
}
// Otherwise, see if the store can be turned into a memcpy.
@@ -412,12 +418,12 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
APInt Stride = getStoreStride(StoreEv);
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
if (StoreSize != Stride && StoreSize != -Stride)
- return false;
+ return LegalStoreKind::None;
// The store must be feeding a non-volatile load.
LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
if (!LI || !LI->isSimple())
- return false;
+ return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load. If we have something else, it's a
@@ -425,18 +431,17 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
const SCEVAddRecExpr *LoadEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
- return false;
+ return LegalStoreKind::None;
// The store and load must share the same stride.
if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
- return false;
+ return LegalStoreKind::None;
// Success. This store can be converted into a memcpy.
- ForMemcpy = true;
- return true;
+ return LegalStoreKind::Memcpy;
}
// This store can't be transformed into a memset/memcpy.
- return false;
+ return LegalStoreKind::None;
}
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
@@ -448,24 +453,28 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
if (!SI)
continue;
- bool ForMemset = false;
- bool ForMemsetPattern = false;
- bool ForMemcpy = false;
// Make sure this is a strided store with a constant stride.
- if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy))
- continue;
-
- // Save the store locations.
- if (ForMemset) {
+ switch (isLegalStore(SI)) {
+ case LegalStoreKind::None:
+ // Nothing to do
+ break;
+ case LegalStoreKind::Memset: {
// Find the base pointer.
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
StoreRefsForMemset[Ptr].push_back(SI);
- } else if (ForMemsetPattern) {
+ } break;
+ case LegalStoreKind::MemsetPattern: {
// Find the base pointer.
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
StoreRefsForMemsetPattern[Ptr].push_back(SI);
- } else if (ForMemcpy)
+ } break;
+ case LegalStoreKind::Memcpy:
StoreRefsForMemcpy.push_back(SI);
+ break;
+ default:
+ assert(false && "unhandled return value");
+ break;
+ }
}
}
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 0ce604429326..32fd3da465fe 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -58,12 +58,30 @@ using namespace llvm;
namespace {
class LoopPredication {
+ /// Represents an induction variable check:
+ /// icmp Pred, <induction variable>, <loop invariant limit>
+ struct LoopICmp {
+ ICmpInst::Predicate Pred;
+ const SCEVAddRecExpr *IV;
+ const SCEV *Limit;
+ LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
+ const SCEV *Limit)
+ : Pred(Pred), IV(IV), Limit(Limit) {}
+ LoopICmp() {}
+ };
+
ScalarEvolution *SE;
Loop *L;
const DataLayout *DL;
BasicBlock *Preheader;
+ Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
+
+ Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
+ ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+ Instruction *InsertAt);
+
Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
IRBuilder<> &Builder);
bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
@@ -116,16 +134,10 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
-/// If ICI can be widened to a loop invariant condition emits the loop
-/// invariant condition in the loop preheader and return it, otherwise
-/// returns None.
-Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
- SCEVExpander &Expander,
- IRBuilder<> &Builder) {
- DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
- DEBUG(ICI->dump());
-
+Optional<LoopPredication::LoopICmp>
+LoopPredication::parseLoopICmp(ICmpInst *ICI) {
ICmpInst::Predicate Pred = ICI->getPredicate();
+
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
const SCEV *LHSS = SE->getSCEV(LHS);
@@ -135,17 +147,54 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
if (isa<SCEVCouldNotCompute>(RHSS))
return None;
- // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index
+ // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV
if (SE->isLoopInvariant(LHSS, L)) {
std::swap(LHS, RHS);
std::swap(LHSS, RHSS);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
- if (!SE->isLoopInvariant(RHSS, L) || !isSafeToExpand(RHSS, *SE))
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS);
+ if (!AR || AR->getLoop() != L)
return None;
- const SCEVAddRecExpr *IndexAR = dyn_cast<SCEVAddRecExpr>(LHSS);
- if (!IndexAR || IndexAR->getLoop() != L)
+ return LoopICmp(Pred, AR, RHSS);
+}
+
+Value *LoopPredication::expandCheck(SCEVExpander &Expander,
+ IRBuilder<> &Builder,
+ ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS, Instruction *InsertAt) {
+ Type *Ty = LHS->getType();
+ assert(Ty == RHS->getType() && "expandCheck operands have different types?");
+ Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt);
+ Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt);
+ return Builder.CreateICmp(Pred, LHSV, RHSV);
+}
+
+/// If ICI can be widened to a loop invariant condition emits the loop
+/// invariant condition in the loop preheader and return it, otherwise
+/// returns None.
+Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
+ SCEVExpander &Expander,
+ IRBuilder<> &Builder) {
+ DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
+ DEBUG(ICI->dump());
+
+ auto RangeCheck = parseLoopICmp(ICI);
+ if (!RangeCheck) {
+ DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
+ return None;
+ }
+
+ ICmpInst::Predicate Pred = RangeCheck->Pred;
+ const SCEVAddRecExpr *IndexAR = RangeCheck->IV;
+ const SCEV *RHSS = RangeCheck->Limit;
+
+ auto CanExpand = [this](const SCEV *S) {
+ return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
+ };
+ if (!CanExpand(RHSS))
return None;
DEBUG(dbgs() << "IndexAR: ");
@@ -170,17 +219,13 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
DEBUG(dbgs() << "NewLHSS: ");
DEBUG(NewLHSS->dump());
- if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE))
+ if (!CanExpand(NewLHSS))
return None;
DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n");
- Type *Ty = LHS->getType();
Instruction *InsertAt = Preheader->getTerminator();
- assert(Ty == RHS->getType() && "icmp operands have different types?");
- Value *NewLHS = Expander.expandCodeFor(NewLHSS, Ty, InsertAt);
- Value *NewRHS = Expander.expandCodeFor(RHSS, Ty, InsertAt);
- return Builder.CreateICmp(Pred, NewLHS, NewRHS);
+ return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt);
}
bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
@@ -272,6 +317,9 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
if (II->getIntrinsicID() == Intrinsic::experimental_guard)
Guards.push_back(II);
+ if (Guards.empty())
+ return false;
+
SCEVExpander Expander(*SE, *DL, "loop-predication");
bool Changed = false;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 2ba9265566a8..7312d97f8efe 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -347,7 +347,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// in the map.
ValueMap[Inst] = V;
if (!C->mayHaveSideEffects()) {
- delete C;
+ C->deleteValue();
C = nullptr;
}
} else {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index bd1f21c69eba..28d94497a3ef 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3805,6 +3805,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
continue;
+ F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
}
}
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 0e7572f8d2e5..5cfbf6baeaa9 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -30,9 +30,19 @@
/// tracks what operations have a given value number (IE it also tracks the
/// reverse mapping from value number -> operations with that value number), so
/// that it only needs to reprocess the instructions that are affected when
-/// something's value number changes. The rest of the algorithm is devoted to
-/// performing symbolic evaluation, forward propagation, and simplification of
-/// operations based on the value numbers deduced so far.
+/// something's value number changes. The vast majority of complexity and code
+/// in this file is devoted to tracking what value numbers could change for what
+/// instructions when various things happen. The rest of the algorithm is
+/// devoted to performing symbolic evaluation, forward propagation, and
+/// simplification of operations based on the value numbers deduced so far
+///
+/// In order to make the GVN mostly-complete, we use a technique derived from
+/// "Detection of Redundant Expressions: A Complete and Polynomial-time
+/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA
+/// based GVN algorithms is related to their inability to detect equivalence
+/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)).
+/// We resolve this issue by generating the equivalent "phi of ops" form for
+/// each op of phis we see, in a way that only takes polynomial time to resolve.
///
/// We also do not perform elimination by using any published algorithm. All
/// published algorithms are O(Instructions). Instead, we use a technique that
@@ -51,7 +61,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -104,12 +113,14 @@ STATISTIC(NumGVNLeaderChanges, "Number of leader changes");
STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes");
STATISTIC(NumGVNAvoidedSortedLeaderChanges,
"Number of avoided sorted leader changes");
-STATISTIC(NumGVNNotMostDominatingLeader,
- "Number of times a member dominated it's new classes' leader");
STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
+STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created");
+STATISTIC(NumGVNPHIOfOpsEliminations,
+ "Number of things eliminated using PHI of ops");
DEBUG_COUNTER(VNCounter, "newgvn-vn",
"Controls which instructions are value numbered")
-
+DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi",
+ "Controls which instructions we create phi of ops for")
// Currently store defining access refinement is too slow due to basicaa being
// egregiously slow. This flag lets us keep it working while we work on this
// issue.
@@ -172,10 +183,9 @@ private:
}
}
// See if we really were the root of a component, by seeing if we still have
- // our DFSNumber.
- // If we do, we are the root of the component, and we have completed a
- // component. If we do not,
- // we are not the root of a component, and belong on the component stack.
+ // our DFSNumber. If we do, we are the root of the component, and we have
+ // completed a component. If we do not, we are not the root of a component,
+ // and belong on the component stack.
if (Root.lookup(I) == OurDFS) {
unsigned ComponentID = Components.size();
Components.resize(Components.size() + 1);
@@ -367,6 +377,7 @@ private:
int StoreCount = 0;
};
+struct HashedExpression;
namespace llvm {
template <> struct DenseMapInfo<const Expression *> {
static const Expression *getEmptyKey() {
@@ -379,9 +390,11 @@ template <> struct DenseMapInfo<const Expression *> {
Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
return reinterpret_cast<const Expression *>(Val);
}
- static unsigned getHashValue(const Expression *V) {
- return static_cast<unsigned>(V->getHashValue());
+ static unsigned getHashValue(const Expression *E) {
+ return static_cast<unsigned>(E->getHashValue());
}
+ static unsigned getHashValue(const HashedExpression &HE);
+ static bool isEqual(const HashedExpression &LHS, const Expression *RHS);
static bool isEqual(const Expression *LHS, const Expression *RHS) {
if (LHS == RHS)
return true;
@@ -393,6 +406,26 @@ template <> struct DenseMapInfo<const Expression *> {
};
} // end namespace llvm
+// This is just a wrapper around Expression that computes the hash value once at
+// creation time. Hash values for an Expression can't change once they are
+// inserted into the DenseMap (it breaks DenseMap), so they must be immutable at
+// that point anyway.
+struct HashedExpression {
+ const Expression *E;
+ unsigned HashVal;
+ HashedExpression(const Expression *E)
+ : E(E), HashVal(DenseMapInfo<const Expression *>::getHashValue(E)) {}
+};
+
+unsigned
+DenseMapInfo<const Expression *>::getHashValue(const HashedExpression &HE) {
+ return HE.HashVal;
+}
+bool DenseMapInfo<const Expression *>::isEqual(const HashedExpression &LHS,
+ const Expression *RHS) {
+ return isEqual(LHS.E, RHS);
+}
+
namespace {
class NewGVN {
Function &F;
@@ -430,6 +463,33 @@ class NewGVN {
// Value Mappings.
DenseMap<Value *, CongruenceClass *> ValueToClass;
DenseMap<Value *, const Expression *> ValueToExpression;
+ // Value PHI handling, used to make equivalence between phi(op, op) and
+ // op(phi, phi).
+ // These mappings just store various data that would normally be part of the
+ // IR.
+ DenseSet<const Instruction *> PHINodeUses;
+ // Map a temporary instruction we created to a parent block.
+ DenseMap<const Value *, BasicBlock *> TempToBlock;
+ // Map between the temporary phis we created and the real instructions they
+ // are known equivalent to.
+ DenseMap<const Value *, PHINode *> RealToTemp;
+ // In order to know when we should re-process instructions that have
+ // phi-of-ops, we track the set of expressions that they needed as
+ // leaders. When we discover new leaders for those expressions, we process the
+ // associated phi-of-op instructions again in case they have changed. The
+ // other way they may change is if they had leaders, and those leaders
+ // disappear. However, at the point they have leaders, there are uses of the
+ // relevant operands in the created phi node, and so they will get reprocessed
+ // through the normal user marking we perform.
+ mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers;
+ DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>>
+ ExpressionToPhiOfOps;
+ // Map from basic block to the temporary operations we created
+ DenseMap<const BasicBlock *, SmallVector<PHINode *, 8>> PHIOfOpsPHIs;
+ // Map from temporary operation to MemoryAccess.
+ DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory;
+ // Set of all temporary instructions we created.
+ DenseSet<Instruction *> AllTempInstructions;
// Mapping from predicate info we used to the instructions we used it with.
// In order to correctly ensure propagation, we must keep track of what
@@ -462,12 +522,19 @@ class NewGVN {
enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
- enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
- mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
+ enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle };
+ mutable DenseMap<const Instruction *, InstCycleState> InstCycleState;
// Expression to class mapping.
using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
ExpressionClassMap ExpressionToClass;
+ // We have a single expression that represents currently DeadExpressions.
+ // For dead expressions we can prove will stay dead, we mark them with
+ // DFS number zero. However, it's possible in the case of phi nodes
+ // for us to assume/prove all arguments are dead during fixpointing.
+ // We use DeadExpression for that case.
+ DeadExpression *SingletonDeadExpression = nullptr;
+
// Which values have changed as a result of leader changes.
SmallPtrSet<Value *, 8> LeaderChanges;
@@ -521,7 +588,8 @@ private:
const Expression *createBinaryExpression(unsigned, Type *, Value *,
Value *) const;
PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge,
- bool &AllConstant) const;
+ bool &OriginalOpsConstant) const;
+ const DeadExpression *createDeadExpression() const;
const VariableExpression *createVariableExpression(Value *) const;
const ConstantExpression *createConstantExpression(Constant *) const;
const Expression *createVariableOrConstant(Value *V) const;
@@ -562,6 +630,9 @@ private:
return CClass;
}
void initializeCongruenceClasses(Function &F);
+ const Expression *makePossiblePhiOfOps(Instruction *, bool,
+ SmallPtrSetImpl<Value *> &);
+ void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue);
// Value number an Instruction or MemoryPhi.
void valueNumberMemoryPhi(MemoryPhi *);
@@ -570,7 +641,8 @@ private:
// Symbolic evaluation.
const Expression *checkSimplificationResults(Expression *, Instruction *,
Value *) const;
- const Expression *performSymbolicEvaluation(Value *) const;
+ const Expression *performSymbolicEvaluation(Value *,
+ SmallPtrSetImpl<Value *> &) const;
const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
Instruction *,
MemoryAccess *) const;
@@ -595,7 +667,7 @@ private:
bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To);
CongruenceClass *getMemoryClass(const MemoryAccess *MA) const;
const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const;
- bool isMemoryAccessTop(const MemoryAccess *) const;
+ bool isMemoryAccessTOP(const MemoryAccess *) const;
// Ranking
unsigned int getRank(const Value *) const;
@@ -619,19 +691,26 @@ private:
void replaceInstruction(Instruction *, Value *);
void markInstructionForDeletion(Instruction *);
void deleteInstructionsInBlock(BasicBlock *);
+ Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const;
// New instruction creation.
void handleNewInstruction(Instruction *){};
// Various instruction touch utilities
+ template <typename Map, typename KeyType, typename Func>
+ void for_each_found(Map &, const KeyType &, Func);
+ template <typename Map, typename KeyType>
+ void touchAndErase(Map &, const KeyType &);
void markUsersTouched(Value *);
void markMemoryUsersTouched(const MemoryAccess *);
void markMemoryDefTouched(const MemoryAccess *);
void markPredicateUsersTouched(Instruction *);
void markValueLeaderChangeTouched(CongruenceClass *CC);
void markMemoryLeaderChangeTouched(CongruenceClass *CC);
+ void markPhiOfOpsChanged(const HashedExpression &HE);
void addPredicateUsers(const PredicateBase *, Instruction *) const;
void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const;
+ void addAdditionalUsers(Value *To, Value *User) const;
// Main loop of value numbering
void iterateTouchedInstructions();
@@ -639,13 +718,18 @@ private:
// Utilities.
void cleanupTables();
std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
- void updateProcessedCount(Value *V);
+ void updateProcessedCount(const Value *V);
void verifyMemoryCongruency() const;
void verifyIterationSettled(Function &F);
void verifyStoreExpressions() const;
- bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;
+ bool singleReachablePHIPath(SmallPtrSet<const MemoryAccess *, 8> &,
+ const MemoryAccess *, const MemoryAccess *) const;
BasicBlock *getBlockForValue(Value *V) const;
void deleteExpression(const Expression *E) const;
+ MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
+ MemoryAccess *getDefiningAccess(const MemoryAccess *) const;
+ MemoryPhi *getMemoryAccess(const BasicBlock *) const;
+ template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
unsigned InstrToDFSNum(const Value *V) const {
assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
return InstrDFS.lookup(V);
@@ -665,8 +749,8 @@ private:
? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
: InstrDFS.lookup(MA);
}
- bool isCycleFree(const PHINode *PN) const;
- template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
+ bool isCycleFree(const Instruction *) const;
+ bool isBackedge(BasicBlock *From, BasicBlock *To) const;
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
std::pair<int, int> StartingVNCounter;
@@ -694,20 +778,46 @@ bool StoreExpression::equals(const Expression &Other) const {
return true;
}
+// Determine if the edge From->To is a backedge
+bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const {
+ if (From == To)
+ return true;
+ auto *FromDTN = DT->getNode(From);
+ auto *ToDTN = DT->getNode(To);
+ return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN);
+}
+
#ifndef NDEBUG
static std::string getBlockName(const BasicBlock *B) {
return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr);
}
#endif
+// Get a MemoryAccess for an instruction, fake or real.
+MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const {
+ auto *Result = MSSA->getMemoryAccess(I);
+ return Result ? Result : TempToMemory.lookup(I);
+}
+
+// Get a MemoryPhi for a basic block. These are all real.
+MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const {
+ return MSSA->getMemoryAccess(BB);
+}
+
// Get the basic block from an instruction/memory value.
BasicBlock *NewGVN::getBlockForValue(Value *V) const {
- if (auto *I = dyn_cast<Instruction>(V))
- return I->getParent();
- else if (auto *MP = dyn_cast<MemoryPhi>(V))
- return MP->getBlock();
- llvm_unreachable("Should have been able to figure out a block for our value");
- return nullptr;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ auto *Parent = I->getParent();
+ if (Parent)
+ return Parent;
+ Parent = TempToBlock.lookup(V);
+ assert(Parent && "Every fake instruction should have a block");
+ return Parent;
+ }
+
+ auto *MP = dyn_cast<MemoryPhi>(V);
+ assert(MP && "Should have been an instruction or a MemoryPhi");
+ return MP->getBlock();
}
// Delete a definitely dead expression, so it can be reused by the expression
@@ -719,10 +829,9 @@ void NewGVN::deleteExpression(const Expression *E) const {
const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
ExpressionAllocator.Deallocate(E);
}
-
PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
- bool &AllConstant) const {
- BasicBlock *PHIBlock = I->getParent();
+ bool &OriginalOpsConstant) const {
+ BasicBlock *PHIBlock = getBlockForValue(I);
auto *PN = cast<PHINode>(I);
auto *E =
new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
@@ -731,8 +840,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
E->setType(I->getType());
E->setOpcode(I->getOpcode());
- unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
-
// NewGVN assumes the operands of a PHI node are in a consistent order across
// PHIs. LLVM doesn't seem to always guarantee this. While we need to fix
// this in LLVM at some point we don't want GVN to find wrong congruences.
@@ -753,18 +860,20 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
});
-
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use *U) -> Value * {
auto *BB = PN->getIncomingBlock(*U);
- auto *DTN = DT->getNode(BB);
- if (RPOOrdering.lookup(DTN) >= PHIRPO)
- HasBackedge = true;
- AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U);
-
- // Don't try to transform self-defined phis.
+ HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
+ OriginalOpsConstant =
+ OriginalOpsConstant && isa<Constant>(*U);
+ // Use nullptr to distinguish between things that were
+ // originally self-defined and those that have an operand
+ // leader that is self-defined.
if (*U == PN)
- return PN;
+ return nullptr;
+ // Things in TOPClass are equivalent to everything.
+ if (ValueToClass.lookup(*U) == TOPClass)
+ return nullptr;
return lookupOperandLeader(*U);
});
return E;
@@ -785,7 +894,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const {
// whether all members are constant.
std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
auto Operand = lookupOperandLeader(O);
- AllConstant &= isa<Constant>(Operand);
+ AllConstant = AllConstant && isa<Constant>(Operand);
return Operand;
});
@@ -848,7 +957,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
if (CC && CC->getDefiningExpr()) {
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
- << " expression " << *V << "\n");
+ << " expression " << *CC->getDefiningExpr() << "\n");
NumGVNOpsSimplified++;
deleteExpression(E);
return CC->getDefiningExpr();
@@ -961,6 +1070,12 @@ NewGVN::createAggregateValueExpression(Instruction *I) const {
llvm_unreachable("Unhandled type of aggregate value operation");
}
+const DeadExpression *NewGVN::createDeadExpression() const {
+ // DeadExpression has no arguments and all DeadExpression's are the same,
+ // so we only need one of them.
+ return SingletonDeadExpression;
+}
+
const VariableExpression *NewGVN::createVariableExpression(Value *V) const {
auto *E = new (ExpressionAllocator) VariableExpression(V);
E->setOpcode(V->getValueID());
@@ -1032,7 +1147,7 @@ bool NewGVN::someEquivalentDominates(const Instruction *Inst,
Value *NewGVN::lookupOperandLeader(Value *V) const {
CongruenceClass *CC = ValueToClass.lookup(V);
if (CC) {
- // Everything in TOP is represneted by undef, as it can be any value.
+ // Everything in TOP is represented by undef, as it can be any value.
// We do have to make sure we get the type right though, so we can't set the
// RepLeader to undef.
if (CC == TOPClass)
@@ -1054,7 +1169,7 @@ const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const {
// Return true if the MemoryAccess is really equivalent to everything. This is
// equivalent to the lattice value "TOP" in most lattices. This is the initial
// state of all MemoryAccesses.
-bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const {
return getMemoryClass(MA) == TOPClass;
}
@@ -1100,7 +1215,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
// Unlike loads, we never try to eliminate stores, so we do not check if they
// are simple and avoid value numbering them.
auto *SI = cast<StoreInst>(I);
- auto *StoreAccess = MSSA->getMemoryAccess(SI);
+ auto *StoreAccess = getMemoryAccess(SI);
// Get the expression, if any, for the RHS of the MemoryDef.
const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess();
if (EnableStoreRefinement)
@@ -1108,7 +1223,6 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
// If we bypassed the use-def chains, make sure we add a use.
if (StoreRHS != StoreAccess->getDefiningAccess())
addMemoryUsers(StoreRHS, StoreAccess);
-
StoreRHS = lookupMemoryLeader(StoreRHS);
// If we are defined by ourselves, use the live on entry def.
if (StoreRHS == StoreAccess)
@@ -1137,9 +1251,9 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
if ((lookupOperandLeader(LI->getPointerOperand()) ==
lookupOperandLeader(SI->getPointerOperand())) &&
- (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) ==
+ (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) ==
StoreRHS))
- return createVariableExpression(LI);
+ return createStoreExpression(SI, StoreRHS);
}
}
@@ -1241,8 +1355,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
// Load of undef is undef.
if (isa<UndefValue>(LoadAddressLeader))
return createConstantExpression(UndefValue::get(LI->getType()));
-
- MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
+ MemoryAccess *OriginalAccess = getMemoryAccess(I);
+ MemoryAccess *DefiningAccess =
+ MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
@@ -1331,6 +1446,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
// operands are equal, because assumes must always be true.
if (CmpInst::isTrueWhenEqual(Predicate)) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createVariableOrConstant(FirstOp);
}
}
@@ -1343,6 +1459,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
(!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createVariableOrConstant(FirstOp);
}
// Handle the special case of floating point.
@@ -1350,6 +1467,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
(!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createConstantExpression(cast<Constant>(FirstOp));
}
}
@@ -1430,34 +1548,33 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From,
return Changed;
}
-// Determine if a phi is cycle-free. That means the values in the phi don't
-// depend on any expressions that can change value as a result of the phi.
-// For example, a non-cycle free phi would be v = phi(0, v+1).
-bool NewGVN::isCycleFree(const PHINode *PN) const {
- // In order to compute cycle-freeness, we do SCC finding on the phi, and see
- // what kind of SCC it ends up in. If it is a singleton, it is cycle-free.
- // If it is not in a singleton, it is only cycle free if the other members are
- // all phi nodes (as they do not compute anything, they are copies). TODO:
- // There are likely a few other intrinsics or expressions that could be
- // included here, but this happens so infrequently already that it is not
- // likely to be worth it.
- auto PCS = PhiCycleState.lookup(PN);
- if (PCS == PCS_Unknown) {
- SCCFinder.Start(PN);
- auto &SCC = SCCFinder.getComponentFor(PN);
+// Determine if a instruction is cycle-free. That means the values in the
+// instruction don't depend on any expressions that can change value as a result
+// of the instruction. For example, a non-cycle free instruction would be v =
+// phi(0, v+1).
+bool NewGVN::isCycleFree(const Instruction *I) const {
+ // In order to compute cycle-freeness, we do SCC finding on the instruction,
+ // and see what kind of SCC it ends up in. If it is a singleton, it is
+ // cycle-free. If it is not in a singleton, it is only cycle free if the
+ // other members are all phi nodes (as they do not compute anything, they are
+ // copies).
+ auto ICS = InstCycleState.lookup(I);
+ if (ICS == ICS_Unknown) {
+ SCCFinder.Start(I);
+ auto &SCC = SCCFinder.getComponentFor(I);
// It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
if (SCC.size() == 1)
- PhiCycleState.insert({PN, PCS_CycleFree});
+ InstCycleState.insert({I, ICS_CycleFree});
else {
bool AllPhis =
llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
- PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
+ ICS = AllPhis ? ICS_CycleFree : ICS_Cycle;
for (auto *Member : SCC)
if (auto *MemberPhi = dyn_cast<PHINode>(Member))
- PhiCycleState.insert({MemberPhi, PCS});
+ InstCycleState.insert({MemberPhi, ICS});
}
}
- if (PCS == PCS_Cycle)
+ if (ICS == ICS_Cycle)
return false;
return true;
}
@@ -1467,10 +1584,9 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// True if one of the incoming phi edges is a backedge.
bool HasBackedge = false;
// All constant tracks the state of whether all the *original* phi operands
- // were constant. This is really shorthand for "this phi cannot cycle due
- // to forward propagation", as any change in value of the phi is guaranteed
- // not to later change the value of the phi.
- // IE it can't be v = phi(undef, v+1)
+ // This is really shorthand for "this phi cannot cycle due to forward
+ // change in value of the phi is guaranteed not to later change the value of
+ // the phi. IE it can't be v = phi(undef, v+1)
bool AllConstant = true;
auto *E =
cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant));
@@ -1478,8 +1594,16 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// See if all arguments are the same.
// We track if any were undef because they need special handling.
bool HasUndef = false;
- auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) {
- if (Arg == I)
+ bool CycleFree = isCycleFree(I);
+ auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) {
+ if (Arg == nullptr)
+ return false;
+ // Original self-operands are already eliminated during expression creation.
+ // We can only eliminate value-wise self-operands if it's cycle
+ // free. Otherwise, eliminating the operand can cause our value to change,
+ // which can cause us to not eliminate the operand, which changes the value
+ // back to what it was before, cycling forever.
+ if (CycleFree && Arg == I)
return false;
if (isa<UndefValue>(Arg)) {
HasUndef = true;
@@ -1487,20 +1611,19 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
}
return true;
});
- // If we are left with no operands, it's undef
+ // If we are left with no operands, it's dead.
if (Filtered.begin() == Filtered.end()) {
- DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
- << "\n");
+ DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n");
deleteExpression(E);
- return createConstantExpression(UndefValue::get(I->getType()));
+ return createDeadExpression();
}
unsigned NumOps = 0;
Value *AllSameValue = *(Filtered.begin());
++Filtered.begin();
// Can't use std::equal here, sadly, because filter.begin moves.
- if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) {
+ if (llvm::all_of(Filtered, [&](Value *Arg) {
++NumOps;
- return V == AllSameValue;
+ return Arg == AllSameValue;
})) {
// In LLVM's non-standard representation of phi nodes, it's possible to have
// phi nodes with cycles (IE dependent on other phis that are .... dependent
@@ -1519,7 +1642,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// constants, or all operands are ignored but the undef, it also must be
// cycle free.
if (!AllConstant && HasBackedge && NumOps > 0 &&
- !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
+ !isa<UndefValue>(AllSameValue) && !CycleFree)
return E;
// Only have to check for instructions
@@ -1690,8 +1813,18 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
return createExpression(I);
}
+// Return true if V is a value that will always be available (IE can
+// be placed anywhere) in the function. We don't do globals here
+// because they are often worse to put in place.
+// TODO: Separate cost from availability
+static bool alwaysAvailable(Value *V) {
+ return isa<Constant>(V) || isa<Argument>(V);
+}
+
// Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
+const Expression *
+NewGVN::performSymbolicEvaluation(Value *V,
+ SmallPtrSetImpl<Value *> &Visited) const {
const Expression *E = nullptr;
if (auto *C = dyn_cast<Constant>(V))
E = createConstantExpression(C);
@@ -1769,12 +1902,39 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
return E;
}
+// Look up a container in a map, and then call a function for each thing in the
+// found container.
+template <typename Map, typename KeyType, typename Func>
+void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) {
+ const auto Result = M.find_as(Key);
+ if (Result != M.end())
+ for (typename Map::mapped_type::value_type Mapped : Result->second)
+ F(Mapped);
+}
+
+// Look up a container of values/instructions in a map, and touch all the
+// instructions in the container. Then erase value from the map.
+template <typename Map, typename KeyType>
+void NewGVN::touchAndErase(Map &M, const KeyType &Key) {
+ const auto Result = M.find_as(Key);
+ if (Result != M.end()) {
+ for (const typename Map::mapped_type::value_type Mapped : Result->second)
+ TouchedInstructions.set(InstrToDFSNum(Mapped));
+ M.erase(Result);
+ }
+}
+
+void NewGVN::addAdditionalUsers(Value *To, Value *User) const {
+ AdditionalUsers[To].insert(User);
+}
+
void NewGVN::markUsersTouched(Value *V) {
// Now mark the users as touched.
for (auto *User : V->users()) {
assert(isa<Instruction>(User) && "Use of value not within an instruction?");
TouchedInstructions.set(InstrToDFSNum(User));
}
+ touchAndErase(AdditionalUsers, V);
}
void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const {
@@ -1791,16 +1951,15 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) {
return;
for (auto U : MA->users())
TouchedInstructions.set(MemoryToDFSNum(U));
- const auto Result = MemoryToUsers.find(MA);
- if (Result != MemoryToUsers.end()) {
- for (auto *User : Result->second)
- TouchedInstructions.set(MemoryToDFSNum(User));
- MemoryToUsers.erase(Result);
- }
+ touchAndErase(MemoryToUsers, MA);
}
// Add I to the set of users of a given predicate.
void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
+ // Don't add temporary instructions to the user lists.
+ if (AllTempInstructions.count(I))
+ return;
+
if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
PredicateToUsers[PBranch->Condition].insert(I);
else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
@@ -1809,12 +1968,7 @@ void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
// Touch all the predicates that depend on this instruction.
void NewGVN::markPredicateUsersTouched(Instruction *I) {
- const auto Result = PredicateToUsers.find(I);
- if (Result != PredicateToUsers.end()) {
- for (auto *User : Result->second)
- TouchedInstructions.set(InstrToDFSNum(User));
- PredicateToUsers.erase(Result);
- }
+ touchAndErase(PredicateToUsers, I);
}
// Mark users affected by a memory leader change.
@@ -1856,11 +2010,11 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const {
assert(!CC->definesNoMemory() && "Can't get next leader if there is none");
if (CC->getStoreCount() > 0) {
if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
- return MSSA->getMemoryAccess(NL);
+ return getMemoryAccess(NL);
// Find the store with the minimum DFS number.
auto *V = getMinDFSOfRange<Value>(make_filter_range(
*CC, [&](const Value *V) { return isa<StoreInst>(V); }));
- return MSSA->getMemoryAccess(cast<StoreInst>(V));
+ return getMemoryAccess(cast<StoreInst>(V));
}
assert(CC->getStoreCount() == 0);
@@ -1945,31 +2099,11 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
if (I == OldClass->getNextLeader().first)
OldClass->resetNextLeader();
- // It's possible, though unlikely, for us to discover equivalences such
- // that the current leader does not dominate the old one.
- // This statistic tracks how often this happens.
- // We assert on phi nodes when this happens, currently, for debugging, because
- // we want to make sure we name phi node cycles properly.
- if (isa<Instruction>(NewClass->getLeader()) && NewClass->getLeader() &&
- I != NewClass->getLeader()) {
- auto *IBB = I->getParent();
- auto *NCBB = cast<Instruction>(NewClass->getLeader())->getParent();
- bool Dominated =
- IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader());
- Dominated = Dominated || DT->properlyDominates(IBB, NCBB);
- if (Dominated) {
- ++NumGVNNotMostDominatingLeader;
- assert(
- !isa<PHINode>(I) &&
- "New class for instruction should not be dominated by instruction");
- }
- }
+ OldClass->erase(I);
+ NewClass->insert(I);
if (NewClass->getLeader() != I)
NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)});
-
- OldClass->erase(I);
- NewClass->insert(I);
// Handle our special casing of stores.
if (auto *SI = dyn_cast<StoreInst>(I)) {
OldClass->decStoreCount();
@@ -1984,7 +2118,6 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// If it's a store expression we are using, it means we are not equivalent
// to something earlier.
if (auto *SE = dyn_cast<StoreExpression>(E)) {
- assert(SE->getStoredValue() != NewClass->getLeader());
NewClass->setStoredValue(SE->getStoredValue());
markValueLeaderChangeTouched(NewClass);
// Shift the new class leader to be the store
@@ -2003,7 +2136,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
// instructions before.
// If it's not a memory use, set the MemoryAccess equivalence
- auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
+ auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I));
if (InstMA)
moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
ValueToClass[I] = NewClass;
@@ -2035,21 +2168,31 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
}
}
+// For a given expression, mark the phi of ops instructions that could have
+// changed as a result.
+void NewGVN::markPhiOfOpsChanged(const HashedExpression &HE) {
+ touchAndErase(ExpressionToPhiOfOps, HE);
+}
+
// Perform congruence finding on a given value numbering expression.
void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
// This is guaranteed to return something, since it will at least find
// TOP.
- CongruenceClass *IClass = ValueToClass[I];
+ CongruenceClass *IClass = ValueToClass.lookup(I);
assert(IClass && "Should have found a IClass");
// Dead classes should have been eliminated from the mapping.
assert(!IClass->isDead() && "Found a dead class");
- CongruenceClass *EClass;
+ CongruenceClass *EClass = nullptr;
+ HashedExpression HE(E);
if (const auto *VE = dyn_cast<VariableExpression>(E)) {
- EClass = ValueToClass[VE->getVariableValue()];
- } else {
- auto lookupResult = ExpressionToClass.insert({E, nullptr});
+ EClass = ValueToClass.lookup(VE->getVariableValue());
+ } else if (isa<DeadExpression>(E)) {
+ EClass = TOPClass;
+ }
+ if (!EClass) {
+ auto lookupResult = ExpressionToClass.insert_as({E, nullptr}, HE);
// If it's not in the value table, create a new congruence class.
if (lookupResult.second) {
@@ -2098,10 +2241,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
if (ClassChanged || LeaderChanged) {
DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E
<< "\n");
- if (ClassChanged)
+ if (ClassChanged) {
moveValueToNewCongruenceClass(I, E, IClass, EClass);
+ markPhiOfOpsChanged(HE);
+ }
+
markUsersTouched(I);
- if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
+ if (MemoryAccess *MA = getMemoryAccess(I))
markMemoryUsersTouched(MA);
if (auto *CI = dyn_cast<CmpInst>(I))
markPredicateUsersTouched(CI);
@@ -2110,11 +2256,11 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
// old store expression. In particular, loads do not compare against stored
// value, so they will find old store expressions (and associated class
// mappings) if we leave them in the table.
- if (ClassChanged && isa<StoreExpression>(E)) {
+ if (ClassChanged && isa<StoreInst>(I)) {
auto *OldE = ValueToExpression.lookup(I);
// It could just be that the old class died. We don't want to erase it if we
// just moved classes.
- if (OldE && isa<StoreExpression>(OldE) && !OldE->equals(*E))
+ if (OldE && isa<StoreExpression>(OldE) && *E != *OldE)
ExpressionToClass.erase(OldE);
}
ValueToExpression[I] = E;
@@ -2139,7 +2285,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
// impact predicates. Otherwise, only mark the phi nodes as touched, as
// they are the only thing that depend on new edges. Anything using their
// values will get propagated to if necessary.
- if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
+ if (MemoryAccess *MemPhi = getMemoryAccess(To))
TouchedInstructions.set(InstrToDFSNum(MemPhi));
auto BI = To->begin();
@@ -2147,6 +2293,9 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
TouchedInstructions.set(InstrToDFSNum(&*BI));
++BI;
}
+ for_each_found(PHIOfOpsPHIs, To, [&](const PHINode *I) {
+ TouchedInstructions.set(InstrToDFSNum(I));
+ });
}
}
}
@@ -2236,7 +2385,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
// This also may be a memory defining terminator, in which case, set it
// equivalent only to itself.
//
- auto *MA = MSSA->getMemoryAccess(TI);
+ auto *MA = getMemoryAccess(TI);
if (MA && !isa<MemoryUse>(MA)) {
auto *CC = ensureLeaderOfMemoryClass(MA);
if (setMemoryClass(MA, CC))
@@ -2245,6 +2394,158 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
}
}
+void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB,
+ Instruction *ExistingValue) {
+ InstrDFS[Op] = InstrToDFSNum(ExistingValue);
+ AllTempInstructions.insert(Op);
+ PHIOfOpsPHIs[BB].push_back(Op);
+ TempToBlock[Op] = BB;
+ if (ExistingValue)
+ RealToTemp[ExistingValue] = Op;
+}
+
+static bool okayForPHIOfOps(const Instruction *I) {
+ return isa<BinaryOperator>(I) || isa<SelectInst>(I) || isa<CmpInst>(I) ||
+ isa<LoadInst>(I);
+}
+
+// When we see an instruction that is an op of phis, generate the equivalent phi
+// of ops form.
+const Expression *
+NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
+ SmallPtrSetImpl<Value *> &Visited) {
+ if (!okayForPHIOfOps(I))
+ return nullptr;
+
+ if (!Visited.insert(I).second)
+ return nullptr;
+ // For now, we require the instruction be cycle free because we don't
+ // *always* create a phi of ops for instructions that could be done as phi
+ // of ops, we only do it if we think it is useful. If we did do it all the
+ // time, we could remove the cycle free check.
+ if (!isCycleFree(I))
+ return nullptr;
+
+ unsigned IDFSNum = InstrToDFSNum(I);
+ // Pretty much all of the instructions we can convert to phi of ops over a
+ // backedge that are adds, are really induction variables, and those are
+ // pretty much pointless to convert. This is very coarse-grained for a
+ // test, so if we do find some value, we can change it later.
+ // But otherwise, what can happen is we convert the induction variable from
+ //
+ // i = phi (0, tmp)
+ // tmp = i + 1
+ //
+ // to
+ // i = phi (0, tmpphi)
+ // tmpphi = phi(1, tmpphi+1)
+ //
+ // Which we don't want to happen. We could just avoid this for all non-cycle
+ // free phis, and we made go that route.
+ if (HasBackedge && I->getOpcode() == Instruction::Add)
+ return nullptr;
+
+ SmallPtrSet<const Value *, 8> ProcessedPHIs;
+ // TODO: We don't do phi translation on memory accesses because it's
+ // complicated. For a load, we'd need to be able to simulate a new memoryuse,
+ // which we don't have a good way of doing ATM.
+ auto *MemAccess = getMemoryAccess(I);
+ // If the memory operation is defined by a memory operation this block that
+ // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi
+ // can't help, as it would still be killed by that memory operation.
+ if (MemAccess && !isa<MemoryPhi>(MemAccess->getDefiningAccess()) &&
+ MemAccess->getDefiningAccess()->getBlock() == I->getParent())
+ return nullptr;
+
+ // Convert op of phis to phi of ops
+ for (auto &Op : I->operands()) {
+ if (!isa<PHINode>(Op))
+ continue;
+ auto *OpPHI = cast<PHINode>(Op);
+ // No point in doing this for one-operand phis.
+ if (OpPHI->getNumOperands() == 1)
+ continue;
+ if (!DebugCounter::shouldExecute(PHIOfOpsCounter))
+ return nullptr;
+ SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops;
+ auto *PHIBlock = getBlockForValue(OpPHI);
+ for (auto PredBB : OpPHI->blocks()) {
+ Value *FoundVal = nullptr;
+ // We could just skip unreachable edges entirely but it's tricky to do
+ // with rewriting existing phi nodes.
+ if (ReachableEdges.count({PredBB, PHIBlock})) {
+ // Clone the instruction, create an expression from it, and see if we
+ // have a leader.
+ Instruction *ValueOp = I->clone();
+ auto Iter = TempToMemory.end();
+ if (MemAccess)
+ Iter = TempToMemory.insert({ValueOp, MemAccess}).first;
+
+ for (auto &Op : ValueOp->operands()) {
+ Op = Op->DoPHITranslation(PHIBlock, PredBB);
+ // When this operand changes, it could change whether there is a
+ // leader for us or not.
+ addAdditionalUsers(Op, I);
+ }
+ // Make sure it's marked as a temporary instruction.
+ AllTempInstructions.insert(ValueOp);
+ // and make sure anything that tries to add it's DFS number is
+ // redirected to the instruction we are making a phi of ops
+ // for.
+ InstrDFS.insert({ValueOp, IDFSNum});
+ const Expression *E = performSymbolicEvaluation(ValueOp, Visited);
+ InstrDFS.erase(ValueOp);
+ AllTempInstructions.erase(ValueOp);
+ ValueOp->deleteValue();
+ if (MemAccess)
+ TempToMemory.erase(Iter);
+ if (!E)
+ return nullptr;
+ FoundVal = findPhiOfOpsLeader(E, PredBB);
+ if (!FoundVal) {
+ ExpressionToPhiOfOps[E].insert(I);
+ return nullptr;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(FoundVal))
+ FoundVal = SI->getValueOperand();
+ } else {
+ DEBUG(dbgs() << "Skipping phi of ops operand for incoming block "
+ << getBlockName(PredBB)
+ << " because the block is unreachable\n");
+ FoundVal = UndefValue::get(I->getType());
+ }
+
+ Ops.push_back({FoundVal, PredBB});
+ DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in "
+ << getBlockName(PredBB) << "\n");
+ }
+ auto *ValuePHI = RealToTemp.lookup(I);
+ bool NewPHI = false;
+ if (!ValuePHI) {
+ ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands());
+ addPhiOfOps(ValuePHI, PHIBlock, I);
+ NewPHI = true;
+ NumGVNPHIOfOpsCreated++;
+ }
+ if (NewPHI) {
+ for (auto PHIOp : Ops)
+ ValuePHI->addIncoming(PHIOp.first, PHIOp.second);
+ } else {
+ unsigned int i = 0;
+ for (auto PHIOp : Ops) {
+ ValuePHI->setIncomingValue(i, PHIOp.first);
+ ValuePHI->setIncomingBlock(i, PHIOp.second);
+ ++i;
+ }
+ }
+
+ DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I
+ << "\n");
+ return performSymbolicEvaluation(ValuePHI, Visited);
+ }
+ return nullptr;
+}
+
// The algorithm initially places the values of the routine in the TOP
// congruence class. The leader of TOP is the undetermined value `undef`.
// When the algorithm has finished, values still in TOP are unreachable.
@@ -2287,6 +2588,12 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
TOPClass->incStoreCount();
}
for (auto &I : *BB) {
+ // TODO: Move to helper
+ if (isa<PHINode>(&I))
+ for (auto *U : I.users())
+ if (auto *UInst = dyn_cast<Instruction>(U))
+ if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst))
+ PHINodeUses.insert(UInst);
// Don't insert void terminators into the class. We don't value number
// them, and they just end up sitting in TOP.
if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
@@ -2311,12 +2618,35 @@ void NewGVN::cleanupTables() {
CongruenceClasses[i] = nullptr;
}
+ // Destroy the value expressions
+ SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(),
+ AllTempInstructions.end());
+ AllTempInstructions.clear();
+
+ // We have to drop all references for everything first, so there are no uses
+ // left as we delete them.
+ for (auto *I : TempInst) {
+ I->dropAllReferences();
+ }
+
+ while (!TempInst.empty()) {
+ auto *I = TempInst.back();
+ TempInst.pop_back();
+ I->deleteValue();
+ }
+
ValueToClass.clear();
ArgRecycler.clear(ExpressionAllocator);
ExpressionAllocator.Reset();
CongruenceClasses.clear();
ExpressionToClass.clear();
ValueToExpression.clear();
+ RealToTemp.clear();
+ AdditionalUsers.clear();
+ ExpressionToPhiOfOps.clear();
+ TempToBlock.clear();
+ TempToMemory.clear();
+ PHIOfOpsPHIs.clear();
ReachableBlocks.clear();
ReachableEdges.clear();
#ifndef NDEBUG
@@ -2332,14 +2662,17 @@ void NewGVN::cleanupTables() {
MemoryToUsers.clear();
}
+// Assign local DFS number mapping to instructions, and leave space for Value
+// PHI's.
std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
unsigned Start) {
unsigned End = Start;
- if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) {
+ if (MemoryAccess *MemPhi = getMemoryAccess(B)) {
InstrDFS[MemPhi] = End++;
DFSToInstr.emplace_back(MemPhi);
}
+ // Then the real block goes next.
for (auto &I : *B) {
// There's no need to call isInstructionTriviallyDead more than once on
// an instruction. Therefore, once we know that an instruction is dead
@@ -2350,7 +2683,6 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
markInstructionForDeletion(&I);
continue;
}
-
InstrDFS[&I] = End++;
DFSToInstr.emplace_back(&I);
}
@@ -2361,7 +2693,7 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
return std::make_pair(Start, End);
}
-void NewGVN::updateProcessedCount(Value *V) {
+void NewGVN::updateProcessedCount(const Value *V) {
#ifndef NDEBUG
if (ProcessedCount.count(V) == 0) {
ProcessedCount.insert({V, 1});
@@ -2375,12 +2707,13 @@ void NewGVN::updateProcessedCount(Value *V) {
// Evaluate MemoryPhi nodes symbolically, just like PHI nodes
void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
// If all the arguments are the same, the MemoryPhi has the same value as the
- // argument.
- // Filter out unreachable blocks and self phis from our operands.
+ // argument. Filter out unreachable blocks and self phis from our operands.
+ // TODO: We could do cycle-checking on the memory phis to allow valueizing for
+ // self-phi checking.
const BasicBlock *PHIBlock = MP->getBlock();
auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
- return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP &&
- !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+ return cast<MemoryAccess>(U) != MP &&
+ !isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
});
// If all that is left is nothing, our memoryphi is undef. We keep it as
@@ -2433,18 +2766,26 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
DEBUG(dbgs() << "Processing instruction " << *I << "\n");
if (!I->isTerminator()) {
const Expression *Symbolized = nullptr;
+ SmallPtrSet<Value *, 2> Visited;
if (DebugCounter::shouldExecute(VNCounter)) {
- Symbolized = performSymbolicEvaluation(I);
+ Symbolized = performSymbolicEvaluation(I, Visited);
+ // Make a phi of ops if necessary
+ if (Symbolized && !isa<ConstantExpression>(Symbolized) &&
+ !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) {
+ // FIXME: Backedge argument
+ auto *PHIE = makePossiblePhiOfOps(I, false, Visited);
+ if (PHIE)
+ Symbolized = PHIE;
+ }
+
} else {
// Mark the instruction as unused so we don't value number it again.
InstrDFS[I] = 0;
}
// If we couldn't come up with a symbolic expression, use the unknown
// expression
- if (Symbolized == nullptr) {
+ if (Symbolized == nullptr)
Symbolized = createUnknownExpression(I);
- }
-
performCongruenceFinding(I, Symbolized);
} else {
// Handle terminators that return values. All of them produce values we
@@ -2460,13 +2801,23 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
// Check if there is a path, using single or equal argument phi nodes, from
// First to Second.
-bool NewGVN::singleReachablePHIPath(const MemoryAccess *First,
- const MemoryAccess *Second) const {
+bool NewGVN::singleReachablePHIPath(
+ SmallPtrSet<const MemoryAccess *, 8> &Visited, const MemoryAccess *First,
+ const MemoryAccess *Second) const {
if (First == Second)
return true;
if (MSSA->isLiveOnEntryDef(First))
return false;
+ // This is not perfect, but as we're just verifying here, we can live with
+ // the loss of precision. The real solution would be that of doing strongly
+ // connected component finding in this routine, and it's probably not worth
+ // the complexity for the time being. So, we just keep a set of visited
+ // MemoryAccess and return true when we hit a cycle.
+ if (Visited.count(First))
+ return true;
+ Visited.insert(First);
+
const auto *EndDef = First;
for (auto *ChainDef : optimized_def_chain(First)) {
if (ChainDef == Second)
@@ -2489,7 +2840,8 @@ bool NewGVN::singleReachablePHIPath(const MemoryAccess *First,
Okay =
std::equal(OperandList.begin(), OperandList.end(), OperandList.begin());
if (Okay)
- return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second);
+ return singleReachablePHIPath(Visited, cast<MemoryAccess>(OperandList[0]),
+ Second);
return false;
}
@@ -2552,17 +2904,17 @@ void NewGVN::verifyMemoryCongruency() const {
auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred);
for (auto KV : Filtered) {
- assert(KV.second != TOPClass &&
- "Memory not unreachable but ended up in TOP");
if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader());
- if (FirstMUD && SecondMUD)
- assert((singleReachablePHIPath(FirstMUD, SecondMUD) ||
+ if (FirstMUD && SecondMUD) {
+ SmallPtrSet<const MemoryAccess *, 8> VisitedMAS;
+ assert((singleReachablePHIPath(VisitedMAS, FirstMUD, SecondMUD) ||
ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
ValueToClass.lookup(SecondMUD->getMemoryInst())) &&
"The instructions for these memory operations should have "
"been in the same congruence class or reachable through"
"a single argument phi");
+ }
} else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
// We can only sanely verify that MemoryDefs in the operand list all have
// the same class.
@@ -2671,7 +3023,7 @@ void NewGVN::iterateTouchedInstructions() {
// Nothing set, nothing to iterate, just return.
if (FirstInstr == -1)
return;
- BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
+ const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
while (TouchedInstructions.any()) {
++Iterations;
// Walk through all the instructions in all the blocks in RPO.
@@ -2688,7 +3040,7 @@ void NewGVN::iterateTouchedInstructions() {
}
Value *V = InstrFromDFSNum(InstrNum);
- BasicBlock *CurrBlock = getBlockForValue(V);
+ const BasicBlock *CurrBlock = getBlockForValue(V);
// If we hit a new block, do reachability processing.
if (CurrBlock != LastBlock) {
@@ -2731,6 +3083,7 @@ bool NewGVN::runGVN() {
bool Changed = false;
NumFuncArgs = F.arg_size();
MSSAWalker = MSSA->getWalker();
+ SingletonDeadExpression = new (ExpressionAllocator) DeadExpression();
// Count number of instructions for sizing of hash tables, and come
// up with a global dfs numbering for instructions.
@@ -2769,6 +3122,7 @@ bool NewGVN::runGVN() {
BlockInstRange.insert({B, BlockRange});
ICount += BlockRange.second - BlockRange.first;
}
+ initializeCongruenceClasses(F);
TouchedInstructions.resize(ICount);
// Ensure we don't end up resizing the expressionToClass map, as
@@ -2779,9 +3133,10 @@ bool NewGVN::runGVN() {
// Initialize the touched instructions to include the entry block.
const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock());
TouchedInstructions.set(InstRange.first, InstRange.second);
+ DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock())
+ << " marked reachable\n");
ReachableBlocks.insert(&F.getEntryBlock());
- initializeCongruenceClasses(F);
iterateTouchedInstructions();
verifyMemoryCongruency();
verifyIterationSettled(F);
@@ -2794,7 +3149,8 @@ bool NewGVN::runGVN() {
if (!ToErase->use_empty())
ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
- ToErase->eraseFromParent();
+ if (ToErase->getParent())
+ ToErase->eraseFromParent();
}
// Delete all unreachable blocks.
@@ -2813,14 +3169,6 @@ bool NewGVN::runGVN() {
return Changed;
}
-// Return true if V is a value that will always be available (IE can
-// be placed anywhere) in the function. We don't do globals here
-// because they are often worse to put in place.
-// TODO: Separate cost from availability
-static bool alwaysAvailable(Value *V) {
- return isa<Constant>(V) || isa<Argument>(V);
-}
-
struct NewGVN::ValueDFS {
int DFSIn = 0;
int DFSOut = 0;
@@ -2910,9 +3258,21 @@ void NewGVN::convertClassToDFSOrdered(
}
assert(isa<Instruction>(D) &&
"The dense set member should always be an instruction");
- VDDef.LocalNum = InstrToDFSNum(D);
- DFSOrderedSet.emplace_back(VDDef);
Instruction *Def = cast<Instruction>(D);
+ VDDef.LocalNum = InstrToDFSNum(D);
+ DFSOrderedSet.push_back(VDDef);
+ // If there is a phi node equivalent, add it
+ if (auto *PN = RealToTemp.lookup(Def)) {
+ auto *PHIE =
+ dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def));
+ if (PHIE) {
+ VDDef.Def.setInt(false);
+ VDDef.Def.setPointer(PN);
+ VDDef.LocalNum = 0;
+ DFSOrderedSet.push_back(VDDef);
+ }
+ }
+
unsigned int UseCount = 0;
// Now add the uses.
for (auto &U : Def->uses()) {
@@ -2929,7 +3289,7 @@ void NewGVN::convertClassToDFSOrdered(
// they are from.
VDUse.LocalNum = InstrDFS.size() + 1;
} else {
- IBlock = I->getParent();
+ IBlock = getBlockForValue(I);
VDUse.LocalNum = InstrToDFSNum(I);
}
@@ -3099,6 +3459,37 @@ private:
};
}
+// Given a value and a basic block we are trying to see if it is available in,
+// see if the value has a leader available in that block.
+Value *NewGVN::findPhiOfOpsLeader(const Expression *E,
+ const BasicBlock *BB) const {
+ // It would already be constant if we could make it constant
+ if (auto *CE = dyn_cast<ConstantExpression>(E))
+ return CE->getConstantValue();
+ if (auto *VE = dyn_cast<VariableExpression>(E))
+ return VE->getVariableValue();
+
+ auto *CC = ExpressionToClass.lookup(E);
+ if (!CC)
+ return nullptr;
+ if (alwaysAvailable(CC->getLeader()))
+ return CC->getLeader();
+
+ for (auto Member : *CC) {
+ auto *MemberInst = dyn_cast<Instruction>(Member);
+ // Anything that isn't an instruction is always available.
+ if (!MemberInst)
+ return Member;
+ // If we are looking for something in the same block as the member, it must
+ // be a leader because this function is looking for operands for a phi node.
+ if (MemberInst->getParent() == BB ||
+ DT->dominates(MemberInst->getParent(), BB)) {
+ return Member;
+ }
+ }
+ return nullptr;
+}
+
bool NewGVN::eliminateInstructions(Function &F) {
// This is a non-standard eliminator. The normal way to eliminate is
// to walk the dominator tree in order, keeping track of available
@@ -3129,25 +3520,42 @@ bool NewGVN::eliminateInstructions(Function &F) {
// DFS numbers are updated, we compute some ourselves.
DT->updateDFSNumbers();
- for (auto &B : F) {
- if (!ReachableBlocks.count(&B)) {
- for (const auto S : successors(&B)) {
- for (auto II = S->begin(); isa<PHINode>(II); ++II) {
- auto &Phi = cast<PHINode>(*II);
- DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block "
- << getBlockName(&B)
- << " with undef due to it being unreachable\n");
- for (auto &Operand : Phi.incoming_values())
- if (Phi.getIncomingBlock(Operand) == &B)
- Operand.set(UndefValue::get(Phi.getType()));
- }
+ // Go through all of our phi nodes, and kill the arguments associated with
+ // unreachable edges.
+ auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) {
+ for (auto &Operand : PHI.incoming_values())
+ if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) {
+ DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block "
+ << getBlockName(PHI.getIncomingBlock(Operand))
+ << " with undef due to it being unreachable\n");
+ Operand.set(UndefValue::get(PHI.getType()));
}
+ };
+ SmallPtrSet<BasicBlock *, 8> BlocksWithPhis;
+ for (auto &B : F)
+ if ((!B.empty() && isa<PHINode>(*B.begin())) ||
+ (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end()))
+ BlocksWithPhis.insert(&B);
+ DenseMap<const BasicBlock *, unsigned> ReachablePredCount;
+ for (auto KV : ReachableEdges)
+ ReachablePredCount[KV.getEnd()]++;
+ for (auto *BB : BlocksWithPhis)
+ // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in
+ // the block and subtract the pred count, but it's more complicated.
+ if (ReachablePredCount.lookup(BB) !=
+ std::distance(pred_begin(BB), pred_end(BB))) {
+ for (auto II = BB->begin(); isa<PHINode>(II); ++II) {
+ auto &PHI = cast<PHINode>(*II);
+ ReplaceUnreachablePHIArgs(PHI, BB);
+ }
+ for_each_found(PHIOfOpsPHIs, BB, [&](PHINode *PHI) {
+ ReplaceUnreachablePHIArgs(*PHI, BB);
+ });
}
- }
// Map to store the use counts
DenseMap<const Value *, unsigned int> UseCounts;
- for (CongruenceClass *CC : reverse(CongruenceClasses)) {
+ for (auto *CC : reverse(CongruenceClasses)) {
// Track the equivalent store info so we can decide whether to try
// dead store elimination.
SmallVector<ValueDFS, 8> PossibleDeadStores;
@@ -3156,13 +3564,15 @@ bool NewGVN::eliminateInstructions(Function &F) {
continue;
// Everything still in the TOP class is unreachable or dead.
if (CC == TOPClass) {
-#ifndef NDEBUG
- for (auto M : *CC)
+ for (auto M : *CC) {
+ auto *VTE = ValueToExpression.lookup(M);
+ if (VTE && isa<DeadExpression>(VTE))
+ markInstructionForDeletion(cast<Instruction>(M));
assert((!ReachableBlocks.count(cast<Instruction>(M)->getParent()) ||
InstructionsToErase.count(cast<Instruction>(M))) &&
"Everything in TOP should be unreachable or dead at this "
"point");
-#endif
+ }
continue;
}
@@ -3195,7 +3605,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
<< "\n");
// If this is a singleton, we can skip it.
- if (CC->size() != 1) {
+ if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
// This is a stack because equality replacement/etc may place
// constants in the middle of the member list, and we want to use
// those constant values in preference to the current leader, over
@@ -3217,6 +3627,22 @@ bool NewGVN::eliminateInstructions(Function &F) {
// We ignore void things because we can't get a value from them.
if (Def && Def->getType()->isVoidTy())
continue;
+ auto *DefInst = dyn_cast_or_null<Instruction>(Def);
+ if (DefInst && AllTempInstructions.count(DefInst)) {
+ auto *PN = cast<PHINode>(DefInst);
+
+ // If this is a value phi and that's the expression we used, insert
+ // it into the program
+ // remove from temp instruction list.
+ AllTempInstructions.erase(PN);
+ auto *DefBlock = getBlockForValue(Def);
+ DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def
+ << " into block "
+ << getBlockName(getBlockForValue(Def)) << "\n");
+ PN->insertBefore(&DefBlock->front());
+ Def = PN;
+ NumGVNPHIOfOpsEliminations++;
+ }
if (EliminationStack.empty()) {
DEBUG(dbgs() << "Elimination Stack is empty\n");
@@ -3301,6 +3727,10 @@ bool NewGVN::eliminateInstructions(Function &F) {
Value *DominatingLeader = EliminationStack.back();
+ auto *II = dyn_cast<IntrinsicInst>(DominatingLeader);
+ if (II && II->getIntrinsicID() == Intrinsic::ssa_copy)
+ DominatingLeader = II->getOperand(0);
+
// Don't replace our existing users with ourselves.
if (U->get() == DominatingLeader)
continue;
@@ -3321,6 +3751,8 @@ bool NewGVN::eliminateInstructions(Function &F) {
// It's about to be alive again.
if (LeaderUseCount == 0 && isa<Instruction>(DominatingLeader))
ProbablyDead.erase(cast<Instruction>(DominatingLeader));
+ if (LeaderUseCount == 0 && II)
+ ProbablyDead.insert(II);
++LeaderUseCount;
AnythingReplaced = true;
}
@@ -3375,7 +3807,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
}
}
}
-
return AnythingReplaced;
}
@@ -3385,19 +3816,23 @@ bool NewGVN::eliminateInstructions(Function &F) {
// we will simplify an operation with all constants so that it doesn't matter
// what order they appear in.
unsigned int NewGVN::getRank(const Value *V) const {
- // Prefer undef to anything else
+ // Prefer constants to undef to anything else
+ // Undef is a constant, have to check it first.
+ // Prefer smaller constants to constantexprs
+ if (isa<ConstantExpr>(V))
+ return 2;
if (isa<UndefValue>(V))
- return 0;
- if (isa<Constant>(V))
return 1;
+ if (isa<Constant>(V))
+ return 0;
else if (auto *A = dyn_cast<Argument>(V))
- return 2 + A->getArgNo();
+ return 3 + A->getArgNo();
// Need to shift the instruction DFS by number of arguments + 3 to account for
// the constant and argument ranking above.
unsigned Result = InstrToDFSNum(V);
if (Result > 0)
- return 3 + NumFuncArgs + Result;
+ return 4 + NumFuncArgs + Result;
// Unreachable or something else, just return a really large number.
return ~0;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 53320bff0883..a20890b22603 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1582,7 +1582,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
}
// No need for extra uses anymore.
- delete DummyInst;
+ DummyInst->deleteValue();
unsigned NumAddedValues = NewMulOps.size();
Value *V = EmitAddTreeOfValues(I, NewMulOps);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1d9beffaf06b..24bd0a2b7bdf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2443,7 +2443,7 @@ private:
"insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
- delete Placeholder;
+ Placeholder->deleteValue();
} else {
LI.replaceAllUsesWith(V);
}
@@ -3898,8 +3898,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
}
NumAllocaPartitionUses += NumUses;
- MaxUsesPerAllocaPartition =
- std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
+ MaxUsesPerAllocaPartition.updateMax(NumUses);
// Now that we've processed all the slices in the new partition, check if any
// PHIs or Selects would block promotion.
@@ -4016,8 +4015,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
NumAllocaPartitions += NumPartitions;
- MaxPartitionsPerAlloca =
- std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
+ MaxPartitionsPerAlloca.updateMax(NumPartitions);
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 2be3f5c533b9..8b8d6590aa6a 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -693,7 +693,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
UnlinkedInst->setOperand(I, nullptr);
RecursivelyDeleteTriviallyDeadInstructions(Op);
}
- delete UnlinkedInst;
+ UnlinkedInst->deleteValue();
}
bool Ret = !UnlinkedInstructions.empty();
UnlinkedInstructions.clear();
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 4aa26fd14fee..bf2ab7c55be2 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -317,7 +317,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (!NewInst->mayHaveSideEffects()) {
VMap[&*II] = V;
- delete NewInst;
+ NewInst->deleteValue();
continue;
}
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b44bc74d6551..27f72fcd8bda 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2235,7 +2235,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
if (!BBI->use_empty())
TranslateMap[&*BBI] = V;
if (!N->mayHaveSideEffects()) {
- delete N; // Instruction folded away, don't need actual inst
+ N->deleteValue(); // Instruction folded away, don't need actual inst
N = nullptr;
}
} else {
@@ -4380,7 +4380,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
// Gather dead cases.
SmallVector<ConstantInt *, 8> DeadCases;
for (auto &Case : SI->cases()) {
- APInt CaseVal = Case.getCaseValue()->getValue();
+ const APInt &CaseVal = Case.getCaseValue()->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
@@ -4946,7 +4946,7 @@ SwitchLookupTable::SwitchLookupTable(
LinearMappingPossible = false;
break;
}
- APInt Val = ConstVal->getValue();
+ const APInt &Val = ConstVal->getValue();
if (I != 0) {
APInt Dist = Val - PrevVal;
if (I == 1) {
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 1de579ed41b0..85c9464b5569 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -426,57 +426,70 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
return Dst;
}
-Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
+Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
+ unsigned CharSize) {
Value *Src = CI->getArgOperand(0);
// Constant folding: strlen("xyz") -> 3
- if (uint64_t Len = GetStringLength(Src))
+ if (uint64_t Len = GetStringLength(Src, CharSize))
return ConstantInt::get(CI->getType(), Len - 1);
// If s is a constant pointer pointing to a string literal, we can fold
- // strlen(s + x) to strlen(s) - x, when x is known to be in the range
+ // strlen(s + x) to strlen(s) - x, when x is known to be in the range
// [0, strlen(s)] or the string has a single null terminator '\0' at the end.
- // We only try to simplify strlen when the pointer s points to an array
+ // We only try to simplify strlen when the pointer s points to an array
// of i8. Otherwise, we would need to scale the offset x before doing the
- // subtraction. This will make the optimization more complex, and it's not
- // very useful because calling strlen for a pointer of other types is
+ // subtraction. This will make the optimization more complex, and it's not
+ // very useful because calling strlen for a pointer of other types is
// very uncommon.
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
- if (!isGEPBasedOnPointerToString(GEP))
+ if (!isGEPBasedOnPointerToString(GEP, CharSize))
return nullptr;
- StringRef Str;
- if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) {
- size_t NullTermIdx = Str.find('\0');
-
- // If the string does not have '\0', leave it to strlen to compute
- // its length.
- if (NullTermIdx == StringRef::npos)
- return nullptr;
-
+ ConstantDataArraySlice Slice;
+ if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) {
+ uint64_t NullTermIdx;
+ if (Slice.Array == nullptr) {
+ NullTermIdx = 0;
+ } else {
+ NullTermIdx = ~((uint64_t)0);
+ for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
+ if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) {
+ NullTermIdx = I;
+ break;
+ }
+ }
+ // If the string does not have '\0', leave it to strlen to compute
+ // its length.
+ if (NullTermIdx == ~((uint64_t)0))
+ return nullptr;
+ }
+
Value *Offset = GEP->getOperand(2);
unsigned BitWidth = Offset->getType()->getIntegerBitWidth();
KnownBits Known(BitWidth);
computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr);
Known.Zero.flipAllBits();
- size_t ArrSize =
+ uint64_t ArrSize =
cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
- // KnownZero's bits are flipped, so zeros in KnownZero now represent
- // bits known to be zeros in Offset, and ones in KnowZero represent
+ // KnownZero's bits are flipped, so zeros in KnownZero now represent
+ // bits known to be zeros in Offset, and ones in KnowZero represent
// bits unknown in Offset. Therefore, Offset is known to be in range
- // [0, NullTermIdx] when the flipped KnownZero is non-negative and
+ // [0, NullTermIdx] when the flipped KnownZero is non-negative and
// unsigned-less-than NullTermIdx.
//
- // If Offset is not provably in the range [0, NullTermIdx], we can still
- // optimize if we can prove that the program has undefined behavior when
- // Offset is outside that range. That is the case when GEP->getOperand(0)
+ // If Offset is not provably in the range [0, NullTermIdx], we can still
+ // optimize if we can prove that the program has undefined behavior when
+ // Offset is outside that range. That is the case when GEP->getOperand(0)
// is a pointer to an object whose memory extent is NullTermIdx+1.
- if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
+ if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
(GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
- NullTermIdx == ArrSize - 1))
- return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
+ NullTermIdx == ArrSize - 1)) {
+ Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
+ return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
Offset);
+ }
}
return nullptr;
@@ -484,8 +497,8 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
// strlen(x?"foo":"bars") --> x ? 3 : 4
if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
- uint64_t LenTrue = GetStringLength(SI->getTrueValue());
- uint64_t LenFalse = GetStringLength(SI->getFalseValue());
+ uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
+ uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
if (LenTrue && LenFalse) {
Function *Caller = CI->getParent()->getParent();
emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller,
@@ -505,6 +518,17 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
+Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
+ return optimizeStringLength(CI, B, 8);
+}
+
+Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
+ Module &M = *CI->getParent()->getParent()->getParent();
+ unsigned WCharSize = TLI->getWCharSize(M) * 8;
+
+ return optimizeStringLength(CI, B, WCharSize);
+}
+
Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -2026,6 +2050,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeMemMove(CI, Builder);
case LibFunc_memset:
return optimizeMemSet(CI, Builder);
+ case LibFunc_wcslen:
+ return optimizeWcslen(CI, Builder);
default:
break;
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 516ab7d03a88..1dc554bede7e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3047,7 +3047,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
if (CreateGatherScatter) {
Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr;
NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart,
- 0, "wide.masked.gather");
+ nullptr, "wide.masked.gather");
Entry[Part] = NewLI;
} else {
// Calculate the pointer for the specific unroll-part.
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 64013d6d687d..e6f78e6b94a3 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -578,12 +578,12 @@ private:
void eraseInstruction(Instruction *I) {
I->removeFromParent();
I->dropAllReferences();
- DeletedInstructions.push_back(std::unique_ptr<Instruction>(I));
+ DeletedInstructions.emplace_back(I);
}
/// Temporary store for deleted instructions. Instructions will be deleted
/// eventually when the BoUpSLP is destructed.
- SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions;
+ SmallVector<unique_value, 8> DeletedInstructions;
/// A list of values that need to extracted out of the tree.
/// This list holds pairs of (Internal Scalar : External User). External User