aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-07-19 07:02:10 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-07-19 07:02:10 +0000
commit93c91e39b29142dec1d03a30df9f6e757f56c193 (patch)
tree33a9b014a327e64450b3c9ed46d8c5bdb78ad345 /lib
parentca089b24d48ef6fa8da2d0bb8c25bb802c4a95c0 (diff)
downloadsrc-93c91e39b29142dec1d03a30df9f6e757f56c193.tar.gz
src-93c91e39b29142dec1d03a30df9f6e757f56c193.zip
Vendor import of llvm trunk r308421:vendor/llvm/llvm-trunk-r308421
Notes
Notes: svn path=/vendor/llvm/dist/; revision=321184 svn path=/vendor/llvm/llvm-trunk-r308421/; revision=321185; tag=vendor/llvm/llvm-trunk-r308421
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp9
-rw-r--r--lib/Analysis/DominanceFrontier.cpp3
-rw-r--r--lib/Analysis/InstCount.cpp8
-rw-r--r--lib/Analysis/InstructionSimplify.cpp21
-rw-r--r--lib/Analysis/IteratedDominanceFrontier.cpp8
-rw-r--r--lib/Analysis/LazyCallGraph.cpp49
-rw-r--r--lib/Analysis/LoopInfo.cpp2
-rw-r--r--lib/Analysis/MemorySSA.cpp1
-rw-r--r--lib/Analysis/PostDominators.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp385
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp5
-rw-r--r--lib/AsmParser/LLLexer.cpp2
-rw-r--r--lib/AsmParser/LLParser.cpp10
-rw-r--r--lib/AsmParser/LLToken.h2
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.cpp8
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp1
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp5
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp81
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp3
-rw-r--r--lib/CodeGen/MachineCombiner.cpp4
-rw-r--r--lib/CodeGen/MachineDominanceFrontier.cpp3
-rw-r--r--lib/CodeGen/MachineDominators.cpp6
-rw-r--r--lib/CodeGen/MachinePostDominators.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp161
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp21
-rw-r--r--lib/CodeGen/XRayInstrumentation.cpp6
-rw-r--r--lib/DebugInfo/CodeView/CVTypeVisitor.cpp77
-rw-r--r--lib/DebugInfo/CodeView/CodeViewRecordIO.cpp9
-rw-r--r--lib/DebugInfo/CodeView/Formatters.cpp7
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp2
-rw-r--r--lib/DebugInfo/CodeView/TypeDumpVisitor.cpp2
-rw-r--r--lib/DebugInfo/CodeView/TypeStreamMerger.cpp173
-rw-r--r--lib/DebugInfo/DWARF/DWARFVerifier.cpp195
-rw-r--r--lib/DebugInfo/PDB/CMakeLists.txt1
-rw-r--r--lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp12
-rw-r--r--lib/DebugInfo/PDB/GenericError.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStream.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp126
-rw-r--r--lib/DebugInfo/PDB/Native/TpiHashing.cpp128
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp1
-rw-r--r--lib/DebugInfo/PDB/PDBExtras.cpp6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h1
-rw-r--r--lib/Fuzzer/CMakeLists.txt1
-rw-r--r--lib/Fuzzer/FuzzerCorpus.h40
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp20
-rw-r--r--lib/Fuzzer/FuzzerFlags.def3
-rw-r--r--lib/Fuzzer/FuzzerInternal.h14
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp30
-rw-r--r--lib/Fuzzer/FuzzerMerge.cpp8
-rw-r--r--lib/Fuzzer/FuzzerMutate.cpp23
-rw-r--r--lib/Fuzzer/FuzzerMutate.h6
-rw-r--r--lib/Fuzzer/FuzzerTracePC.cpp83
-rw-r--r--lib/Fuzzer/FuzzerTracePC.h23
-rw-r--r--lib/Fuzzer/FuzzerTraceState.cpp181
-rw-r--r--lib/Fuzzer/FuzzerUtil.cpp7
-rw-r--r--lib/Fuzzer/FuzzerUtil.h10
-rw-r--r--lib/Fuzzer/afl/afl_driver.cpp4
-rw-r--r--lib/Fuzzer/test/CMakeLists.txt3
-rw-r--r--lib/Fuzzer/test/FlagsTest.cpp32
-rw-r--r--lib/Fuzzer/test/FuzzerUnittest.cpp31
-rw-r--r--lib/Fuzzer/test/fuzzer-flags.test17
-rw-r--r--lib/Fuzzer/test/fuzzer-traces-hooks.test2
-rw-r--r--lib/Fuzzer/test/reduce_inputs.test3
-rw-r--r--lib/IR/AsmWriter.cpp3
-rw-r--r--lib/IR/Constants.cpp86
-rw-r--r--lib/IR/Core.cpp1
-rw-r--r--lib/IR/DIBuilder.cpp26
-rw-r--r--lib/IR/DebugInfoMetadata.cpp9
-rw-r--r--lib/IR/Dominators.cpp42
-rw-r--r--lib/IR/LLVMContextImpl.h16
-rw-r--r--lib/IR/LegacyPassManager.cpp51
-rw-r--r--lib/IR/Module.cpp2
-rw-r--r--lib/Object/ArchiveWriter.cpp3
-rw-r--r--lib/Object/COFFImportFile.cpp144
-rw-r--r--lib/Object/COFFModuleDefinition.cpp36
-rw-r--r--lib/Object/COFFObjectFile.cpp5
-rw-r--r--lib/ObjectYAML/CodeViewYAMLTypes.cpp27
-rw-r--r--lib/Option/OptTable.cpp25
-rw-r--r--lib/Support/ErrorHandling.cpp22
-rw-r--r--lib/Support/Host.cpp41
-rw-r--r--lib/Support/Path.cpp2
-rw-r--r--lib/Support/TargetParser.cpp2
-rw-r--r--lib/Support/YAMLTraits.cpp8
-rw-r--r--lib/Support/raw_ostream.cpp29
-rw-r--r--lib/Target/AArch64/AArch64.h4
-rw-r--r--lib/Target/AArch64/AArch64.td4
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td7
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp4
-rw-r--r--lib/Target/AArch64/AArch64FalkorHWPFFix.cpp790
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp1
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp12
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp15
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp64
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h16
-rw-r--r--lib/Target/AArch64/AArch64InstrAtomics.td10
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp13
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h12
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td2
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp4
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp7
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h13
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp23
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp21
-rw-r--r--lib/Target/AArch64/CMakeLists.txt1
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp6
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp61
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp252
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp9
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp32
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp2
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp5
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h1
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp1
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp11
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp119
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h2
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp24
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h19
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td2
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp68
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h10
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp4
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.td17
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp18
-rw-r--r--lib/Target/AMDGPU/VOP2Instructions.td11
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td11
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td10
-rw-r--r--lib/Target/AMDGPU/VOPCInstructions.td24
-rw-r--r--lib/Target/AMDGPU/VOPInstructions.td2
-rw-r--r--lib/Target/ARM/ARM.td431
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp43
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp20
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp23
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp4
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h2
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp45
-rw-r--r--lib/Target/BPF/BPFInstrInfo.td5
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonDepInstrInfo.td4
-rw-r--r--lib/Target/Hexagon/HexagonEarlyIfConv.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonGenInsert.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonGenPredicate.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp147
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h4
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td12
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td92
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp47
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h6
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp219
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp15
-rw-r--r--lib/Target/Mips/Mips.td3
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp146
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td9
-rw-r--r--lib/Target/Mips/MipsMTInstrFormats.td21
-rw-r--r--lib/Target/Mips/MipsMTInstrInfo.td110
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp99
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h3
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp203
-rw-r--r--lib/Target/Mips/MipsSchedule.td4
-rw-r--r--lib/Target/Mips/MipsSubtarget.h5
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp3
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp28
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp18
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp10
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td22
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td90
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp30
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--lib/Target/Sparc/Sparc.td6
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp13
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td3
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp1
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h2
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp15
-rw-r--r--lib/Target/SystemZ/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/SystemZFeatures.td58
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp104
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h7
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td88
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td330
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp32
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td68
-rw-r--r--lib/Target/SystemZ/SystemZInstrSystem.td4
-rw-r--r--lib/Target/SystemZ/SystemZInstrVector.td366
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td20
-rw-r--r--lib/Target/SystemZ/SystemZPatterns.td7
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td14
-rw-r--r--lib/Target/SystemZ/SystemZSchedule.td3
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ14.td1611
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ196.td163
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZEC12.td161
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp40
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h34
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h4
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/X86.h3
-rw-r--r--lib/Target/X86/X86.td6
-rw-r--r--lib/Target/X86/X86CallingConv.td4
-rw-r--r--lib/Target/X86/X86CmovConversion.cpp611
-rw-r--r--lib/Target/X86/X86FastISel.cpp4
-rw-r--r--lib/Target/X86/X86FixupBWInsts.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp66
-rw-r--r--lib/Target/X86/X86InstrAVX512.td231
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp6
-rw-r--r--lib/Target/X86/X86Schedule.td1
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td16
-rw-r--r--lib/Target/X86/X86ScheduleZnver1.td223
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp1
-rw-r--r--lib/Target/X86/X86TargetMachine.h2
-rw-r--r--lib/ToolDrivers/CMakeLists.txt1
-rw-r--r--lib/ToolDrivers/LLVMBuild.txt2
-rw-r--r--lib/ToolDrivers/llvm-dlltool/CMakeLists.txt9
-rw-r--r--lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp160
-rw-r--r--lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt22
-rw-r--r--lib/ToolDrivers/llvm-dlltool/Options.td26
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp18
-rw-r--r--lib/Transforms/IPO/Inliner.cpp2
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp11
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp56
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp36
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp76
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp38
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp4
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp10
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp16
-rw-r--r--lib/Transforms/Scalar/GVN.cpp1
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp72
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp82
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp119
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp15
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp30
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp64
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp30
258 files changed, 8701 insertions, 2985 deletions
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index 3ddefc6520a7..74b5d79ebac5 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -433,7 +433,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
if (Visited.insert(C).second)
Worklist.push_back(C);
- LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) {
+ auto VisitRef = [&](Function &Referee) {
Node &RefereeN = *G.lookup(Referee);
Edge *E = N->lookup(RefereeN);
// FIXME: Similarly to new calls, we also currently preclude
@@ -444,7 +444,12 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
RetainedEdges.insert(&RefereeN);
if (E->isCall())
DemotedCallTargets.insert(&RefereeN);
- });
+ };
+ LazyCallGraph::visitReferences(Worklist, Visited, VisitRef);
+
+ // Include synthetic reference edges to known, defined lib functions.
+ for (auto *F : G.getLibFunctions())
+ VisitRef(*F);
// First remove all of the edges that are no longer present in this function.
// We have to build a list of dead targets first and then remove them as the
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 5b6e2d0476e4..c08c6cfe0c3b 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -14,7 +14,8 @@
using namespace llvm;
namespace llvm {
-template class DominanceFrontierBase<BasicBlock>;
+template class DominanceFrontierBase<BasicBlock, false>;
+template class DominanceFrontierBase<BasicBlock, true>;
template class ForwardDominanceFrontierBase<BasicBlock>;
}
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 27c6b580e7ac..95ab6ee3db5b 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -26,7 +26,6 @@ using namespace llvm;
STATISTIC(TotalInsts , "Number of instructions (of all types)");
STATISTIC(TotalBlocks, "Number of basic blocks");
STATISTIC(TotalFuncs , "Number of non-external functions");
-STATISTIC(TotalMemInst, "Number of memory instructions");
#define HANDLE_INST(N, OPCODE, CLASS) \
STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
@@ -75,13 +74,6 @@ FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
// function.
//
bool InstCount::runOnFunction(Function &F) {
- unsigned StartMemInsts =
- NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
- NumInvokeInst + NumAllocaInst;
visit(F);
- unsigned EndMemInsts =
- NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
- NumInvokeInst + NumAllocaInst;
- TotalMemInst += EndMemInsts-StartMemInsts;
return false;
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index f6632020b8fc..b4f3b87e1846 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1745,14 +1745,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Constant::getNullValue(Op0->getType());
// (A | ?) & A = A
- Value *A = nullptr, *B = nullptr;
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- (A == Op1 || B == Op1))
+ if (match(Op0, m_c_Or(m_Specific(Op1), m_Value())))
return Op1;
// A & (A | ?) = A
- if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
- (A == Op0 || B == Op0))
+ if (match(Op1, m_c_Or(m_Specific(Op0), m_Value())))
return Op0;
// A mask that only clears known zeros of a shifted value is a no-op.
@@ -1852,26 +1849,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Constant::getAllOnesValue(Op0->getType());
// (A & ?) | A = A
- Value *A = nullptr, *B = nullptr;
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- (A == Op1 || B == Op1))
+ if (match(Op0, m_c_And(m_Specific(Op1), m_Value())))
return Op1;
// A | (A & ?) = A
- if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
- (A == Op0 || B == Op0))
+ if (match(Op1, m_c_And(m_Specific(Op0), m_Value())))
return Op0;
// ~(A & ?) | A = -1
- if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
- (A == Op1 || B == Op1))
+ if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value()))))
return Constant::getAllOnesValue(Op1->getType());
// A | ~(A & ?) = -1
- if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
- (A == Op0 || B == Op0))
+ if (match(Op1, m_Not(m_c_And(m_Specific(Op1), m_Value()))))
return Constant::getAllOnesValue(Op0->getType());
+ Value *A, *B;
// (A & ~B) | (A ^ B) -> (A ^ B)
// (~B & A) | (A ^ B) -> (A ^ B)
// (A & ~B) | (B ^ A) -> (B ^ A)
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
index 0e02850df349..3992657417c5 100644
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -17,8 +17,8 @@
#include <queue>
namespace llvm {
-template <class NodeTy>
-void IDFCalculator<NodeTy>::calculate(
+template <class NodeTy, bool IsPostDom>
+void IDFCalculator<NodeTy, IsPostDom>::calculate(
SmallVectorImpl<BasicBlock *> &PHIBlocks) {
// Use a priority queue keyed on dominator tree level so that inserted nodes
// are handled from the bottom of the dominator tree upwards.
@@ -88,6 +88,6 @@ void IDFCalculator<NodeTy>::calculate(
}
}
-template class IDFCalculator<BasicBlock *>;
-template class IDFCalculator<Inverse<BasicBlock *>>;
+template class IDFCalculator<BasicBlock *, false>;
+template class IDFCalculator<Inverse<BasicBlock *>, true>;
}
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index a4c3e43b4b0c..d287f81985fd 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -106,6 +106,13 @@ LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() {
LazyCallGraph::Edge::Ref);
});
+ // Add implicit reference edges to any defined libcall functions (if we
+ // haven't found an explicit edge).
+ for (auto *F : G->LibFunctions)
+ if (!Visited.count(F))
+ addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(*F),
+ LazyCallGraph::Edge::Ref);
+
return *Edges;
}
@@ -120,15 +127,34 @@ LLVM_DUMP_METHOD void LazyCallGraph::Node::dump() const {
}
#endif
-LazyCallGraph::LazyCallGraph(Module &M) {
+static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
+ LibFunc LF;
+
+ // Either this is a normal library function or a "vectorizable" function.
+ return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName());
+}
+
+LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
<< "\n");
- for (Function &F : M)
- if (!F.isDeclaration() && !F.hasLocalLinkage()) {
- DEBUG(dbgs() << " Adding '" << F.getName()
- << "' to entry set of the graph.\n");
- addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref);
- }
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ // If this function is a known lib function to LLVM then we want to
+ // synthesize reference edges to it to model the fact that LLVM can turn
+ // arbitrary code into a library function call.
+ if (isKnownLibFunction(F, TLI))
+ LibFunctions.insert(&F);
+
+ if (F.hasLocalLinkage())
+ continue;
+
+ // External linkage defined functions have edges to them from other
+ // modules.
+ DEBUG(dbgs() << " Adding '" << F.getName()
+ << "' to entry set of the graph.\n");
+ addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref);
+ }
// Now add entry nodes for functions reachable via initializers to globals.
SmallVector<Constant *, 16> Worklist;
@@ -149,7 +175,8 @@ LazyCallGraph::LazyCallGraph(Module &M) {
LazyCallGraph::LazyCallGraph(LazyCallGraph &&G)
: BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)),
EntryEdges(std::move(G.EntryEdges)), SCCBPA(std::move(G.SCCBPA)),
- SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)) {
+ SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)),
+ LibFunctions(std::move(G.LibFunctions)) {
updateGraphPtrs();
}
@@ -160,6 +187,7 @@ LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
SCCBPA = std::move(G.SCCBPA);
SCCMap = std::move(G.SCCMap);
LeafRefSCCs = std::move(G.LeafRefSCCs);
+ LibFunctions = std::move(G.LibFunctions);
updateGraphPtrs();
return *this;
}
@@ -1580,6 +1608,11 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
assert(F.use_empty() &&
"This routine should only be called on trivially dead functions!");
+ // We shouldn't remove library functions as they are never really dead while
+ // the call graph is in use -- every function definition refers to them.
+ assert(!isLibFunction(F) &&
+ "Must not remove lib functions from the call graph!");
+
auto NI = NodeMap.find(&F);
if (NI == NodeMap.end())
// Not in the graph at all!
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index baf932432a0a..697b58622bb4 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -609,7 +609,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
return NearLoop;
}
-LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
+LoopInfo::LoopInfo(const DomTreeBase<BasicBlock> &DomTree) {
analyze(DomTree);
}
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 86d0d92799f2..86de474c7aa9 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -39,7 +39,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Transforms/Scalar.h"
#include <algorithm>
#define DEBUG_TYPE "memoryssa"
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 1caf151546d9..811373ac850b 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -23,6 +23,8 @@ using namespace llvm;
#define DEBUG_TYPE "postdomtree"
+template class llvm::DominatorTreeBase<BasicBlock, true>; // PostDomTreeBase
+
//===----------------------------------------------------------------------===//
// PostDominatorTree Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 3fb1ab980add..b973203a89b6 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -4173,6 +4173,319 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
return None;
}
+/// Helper function to createAddRecFromPHIWithCasts. We have a phi
+/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
+/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
+/// way. This function checks if \p Op, an operand of this SCEVAddExpr,
+/// follows one of the following patterns:
+/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
+/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
+/// If the SCEV expression of \p Op conforms with one of the expected patterns
+/// we return the type of the truncation operation, and indicate whether the
+/// truncated type should be treated as signed/unsigned by setting
+/// \p Signed to true/false, respectively.
+static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
+ bool &Signed, ScalarEvolution &SE) {
+
+ // The case where Op == SymbolicPHI (that is, with no type conversions on
+ // the way) is handled by the regular add recurrence creating logic and
+ // would have already been triggered in createAddRecForPHI. Reaching it here
+ // means that createAddRecFromPHI had failed for this PHI before (e.g.,
+ // because one of the other operands of the SCEVAddExpr updating this PHI is
+ // not invariant).
+ //
+ // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
+ // this case predicates that allow us to prove that Op == SymbolicPHI will
+ // be added.
+ if (Op == SymbolicPHI)
+ return nullptr;
+
+ unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType());
+ unsigned NewBits = SE.getTypeSizeInBits(Op->getType());
+ if (SourceBits != NewBits)
+ return nullptr;
+
+ const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op);
+ const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op);
+ if (!SExt && !ZExt)
+ return nullptr;
+ const SCEVTruncateExpr *Trunc =
+ SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand())
+ : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand());
+ if (!Trunc)
+ return nullptr;
+ const SCEV *X = Trunc->getOperand();
+ if (X != SymbolicPHI)
+ return nullptr;
+ Signed = SExt ? true : false;
+ return Trunc->getType();
+}
+
+static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
+ if (!PN->getType()->isIntegerTy())
+ return nullptr;
+ const Loop *L = LI.getLoopFor(PN->getParent());
+ if (!L || L->getHeader() != PN->getParent())
+ return nullptr;
+ return L;
+}
+
+// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
+// computation that updates the phi follows the following pattern:
+// (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
+// which correspond to a phi->trunc->sext/zext->add->phi update chain.
+// If so, try to see if it can be rewritten as an AddRecExpr under some
+// Predicates. If successful, return them as a pair. Also cache the results
+// of the analysis.
+//
+// Example usage scenario:
+// Say the Rewriter is called for the following SCEV:
+// 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
+// where:
+// %X = phi i64 (%Start, %BEValue)
+// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
+// and call this function with %SymbolicPHI = %X.
+//
+// The analysis will find that the value coming around the backedge has
+// the following SCEV:
+// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
+// Upon concluding that this matches the desired pattern, the function
+// will return the pair {NewAddRec, SmallPredsVec} where:
+// NewAddRec = {%Start,+,%Step}
+// SmallPredsVec = {P1, P2, P3} as follows:
+// P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw>
+// P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64)
+// P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64)
+// The returned pair means that SymbolicPHI can be rewritten into NewAddRec
+// under the predicates {P1,P2,P3}.
+// This predicated rewrite will be cached in PredicatedSCEVRewrites:
+// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
+//
+// TODO's:
+//
+// 1) Extend the Induction descriptor to also support inductions that involve
+// casts: When needed (namely, when we are called in the context of the
+// vectorizer induction analysis), a Set of cast instructions will be
+// populated by this method, and provided back to isInductionPHI. This is
+// needed to allow the vectorizer to properly record them to be ignored by
+// the cost model and to avoid vectorizing them (otherwise these casts,
+// which are redundant under the runtime overflow checks, will be
+// vectorized, which can be costly).
+//
+// 2) Support additional induction/PHISCEV patterns: We also want to support
+// inductions where the sext-trunc / zext-trunc operations (partly) occur
+// after the induction update operation (the induction increment):
+//
+// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
+// which correspond to a phi->add->trunc->sext/zext->phi update chain.
+//
+// (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix)
+// which correspond to a phi->trunc->add->sext/zext->phi update chain.
+//
+// 3) Outline common code with createAddRecFromPHI to avoid duplication.
+//
+Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
+ SmallVector<const SCEVPredicate *, 3> Predicates;
+
+ // *** Part1: Analyze if we have a phi-with-cast pattern for which we can
+ // return an AddRec expression under some predicate.
+
+ auto *PN = cast<PHINode>(SymbolicPHI->getValue());
+ const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
+ assert (L && "Expecting an integer loop header phi");
+
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi as an addrec if it has a unique entry value and a unique
+ // backedge value.
+ Value *BEValueV = nullptr, *StartValueV = nullptr;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (L->contains(PN->getIncomingBlock(i))) {
+ if (!BEValueV) {
+ BEValueV = V;
+ } else if (BEValueV != V) {
+ BEValueV = nullptr;
+ break;
+ }
+ } else if (!StartValueV) {
+ StartValueV = V;
+ } else if (StartValueV != V) {
+ StartValueV = nullptr;
+ break;
+ }
+ }
+ if (!BEValueV || !StartValueV)
+ return None;
+
+ const SCEV *BEValue = getSCEV(BEValueV);
+
+ // If the value coming around the backedge is an add with the symbolic
+ // value we just inserted, possibly with casts that we can ignore under
+ // an appropriate runtime guard, then we found a simple induction variable!
+ const auto *Add = dyn_cast<SCEVAddExpr>(BEValue);
+ if (!Add)
+ return None;
+
+ // If there is a single occurrence of the symbolic value, possibly
+ // casted, replace it with a recurrence.
+ unsigned FoundIndex = Add->getNumOperands();
+ Type *TruncTy = nullptr;
+ bool Signed;
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if ((TruncTy =
+ isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
+ if (FoundIndex == e) {
+ FoundIndex = i;
+ break;
+ }
+
+ if (FoundIndex == Add->getNumOperands())
+ return None;
+
+ // Create an add with everything but the specified operand.
+ SmallVector<const SCEV *, 8> Ops;
+ for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+ if (i != FoundIndex)
+ Ops.push_back(Add->getOperand(i));
+ const SCEV *Accum = getAddExpr(Ops);
+
+ // The runtime checks will not be valid if the step amount is
+ // varying inside the loop.
+ if (!isLoopInvariant(Accum, L))
+ return None;
+
+
+ // *** Part2: Create the predicates
+
+ // Analysis was successful: we have a phi-with-cast pattern for which we
+ // can return an AddRec expression under the following predicates:
+ //
+ // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
+ // fits within the truncated type (does not overflow) for i = 0 to n-1.
+ // P2: An Equal predicate that guarantees that
+ // Start = (Ext ix (Trunc iy (Start) to ix) to iy)
+ // P3: An Equal predicate that guarantees that
+ // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
+ //
+ // As we next prove, the above predicates guarantee that:
+ // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy)
+ //
+ //
+ // More formally, we want to prove that:
+ // Expr(i+1) = Start + (i+1) * Accum
+ // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
+ //
+ // Given that:
+ // 1) Expr(0) = Start
+ // 2) Expr(1) = Start + Accum
+ // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
+ // 3) Induction hypothesis (step i):
+ // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
+ //
+ // Proof:
+ // Expr(i+1) =
+ // = Start + (i+1)*Accum
+ // = (Start + i*Accum) + Accum
+ // = Expr(i) + Accum
+ // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
+ // :: from step i
+ //
+ // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
+ //
+ // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
+ // + (Ext ix (Trunc iy (Accum) to ix) to iy)
+ // + Accum :: from P3
+ //
+ // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
+ // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
+ //
+ // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
+ // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
+ //
+ // By induction, the same applies to all iterations 1<=i<n:
+ //
+
+ // Create a truncated addrec for which we will add a no overflow check (P1).
+ const SCEV *StartVal = getSCEV(StartValueV);
+ const SCEV *PHISCEV =
+ getAddRecExpr(getTruncateExpr(StartVal, TruncTy),
+ getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap);
+ const auto *AR = cast<SCEVAddRecExpr>(PHISCEV);
+
+ SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
+ Signed ? SCEVWrapPredicate::IncrementNSSW
+ : SCEVWrapPredicate::IncrementNUSW;
+ const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
+ Predicates.push_back(AddRecPred);
+
+ // Create the Equal Predicates P2,P3:
+ auto AppendPredicate = [&](const SCEV *Expr) -> void {
+ assert (isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
+ const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
+ const SCEV *ExtendedExpr =
+ Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType())
+ : getZeroExtendExpr(TruncatedExpr, Expr->getType());
+ if (Expr != ExtendedExpr &&
+ !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
+ const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
+ DEBUG (dbgs() << "Added Predicate: " << *Pred);
+ Predicates.push_back(Pred);
+ }
+ };
+
+ AppendPredicate(StartVal);
+ AppendPredicate(Accum);
+
+ // *** Part3: Predicates are ready. Now go ahead and create the new addrec in
+ // which the casts had been folded away. The caller can rewrite SymbolicPHI
+ // into NewAR if it will also add the runtime overflow checks specified in
+ // Predicates.
+ auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap);
+
+ std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite =
+ std::make_pair(NewAR, Predicates);
+ // Remember the result of the analysis for this SCEV at this locayyytion.
+ PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite;
+ return PredRewrite;
+}
+
+Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
+
+ auto *PN = cast<PHINode>(SymbolicPHI->getValue());
+ const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
+ if (!L)
+ return None;
+
+ // Check to see if we already analyzed this PHI.
+ auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L});
+ if (I != PredicatedSCEVRewrites.end()) {
+ std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite =
+ I->second;
+ // Analysis was done before and failed to create an AddRec:
+ if (Rewrite.first == SymbolicPHI)
+ return None;
+ // Analysis was done before and succeeded to create an AddRec under
+ // a predicate:
+ assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec");
+ assert(!(Rewrite.second).empty() && "Expected to find Predicates");
+ return Rewrite;
+ }
+
+ Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+ Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI);
+
+ // Record in the cache that the analysis failed
+ if (!Rewrite) {
+ SmallVector<const SCEVPredicate *, 3> Predicates;
+ PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates};
+ return None;
+ }
+
+ return Rewrite;
+}
+
/// A helper function for createAddRecFromPHI to handle simple cases.
///
/// This function tries to find an AddRec expression for the simplest (yet most
@@ -5904,6 +6217,16 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
RemoveLoopFromBackedgeMap(BackedgeTakenCounts);
RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts);
+ // Drop information about predicated SCEV rewrites for this loop.
+ for (auto I = PredicatedSCEVRewrites.begin();
+ I != PredicatedSCEVRewrites.end();) {
+ std::pair<const SCEV *, const Loop *> Entry = I->first;
+ if (Entry.second == L)
+ PredicatedSCEVRewrites.erase(I++);
+ else
+ ++I;
+ }
+
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
PushLoopPHIs(L, Worklist);
@@ -10062,6 +10385,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
UniquePreds(std::move(Arg.UniquePreds)),
SCEVAllocator(std::move(Arg.SCEVAllocator)),
+ PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)),
FirstUnknown(Arg.FirstUnknown) {
Arg.FirstUnknown = nullptr;
}
@@ -10462,6 +10786,15 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
HasRecMap.erase(S);
MinTrailingZerosCache.erase(S);
+ for (auto I = PredicatedSCEVRewrites.begin();
+ I != PredicatedSCEVRewrites.end();) {
+ std::pair<const SCEV *, const Loop *> Entry = I->first;
+ if (Entry.first == S)
+ PredicatedSCEVRewrites.erase(I++);
+ else
+ ++I;
+ }
+
auto RemoveSCEVFromBackedgeMap =
[S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
for (auto I = Map.begin(), E = Map.end(); I != E;) {
@@ -10621,10 +10954,11 @@ void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
-const SCEVPredicate *
-ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS,
- const SCEVConstant *RHS) {
+const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS,
+ const SCEV *RHS) {
FoldingSetNodeID ID;
+ assert(LHS->getType() == RHS->getType() &&
+ "Type mismatch between LHS and RHS");
// Unique this node based on the arguments
ID.AddInteger(SCEVPredicate::P_Equal);
ID.AddPointer(LHS);
@@ -10687,8 +11021,7 @@ public:
if (IPred->getLHS() == Expr)
return IPred->getRHS();
}
-
- return Expr;
+ return convertToAddRecWithPreds(Expr);
}
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
@@ -10724,17 +11057,41 @@ public:
}
private:
- bool addOverflowAssumption(const SCEVAddRecExpr *AR,
- SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
- auto *A = SE.getWrapPredicate(AR, AddedFlags);
+ bool addOverflowAssumption(const SCEVPredicate *P) {
if (!NewPreds) {
// Check if we've already made this assumption.
- return Pred && Pred->implies(A);
+ return Pred && Pred->implies(P);
}
- NewPreds->insert(A);
+ NewPreds->insert(P);
return true;
}
+ bool addOverflowAssumption(const SCEVAddRecExpr *AR,
+ SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
+ auto *A = SE.getWrapPredicate(AR, AddedFlags);
+ return addOverflowAssumption(A);
+ }
+
+ // If \p Expr represents a PHINode, we try to see if it can be represented
+ // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible
+ // to add this predicate as a runtime overflow check, we return the AddRec.
+ // If \p Expr does not meet these conditions (is not a PHI node, or we
+ // couldn't create an AddRec for it, or couldn't add the predicate), we just
+ // return \p Expr.
+ const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
+ if (!isa<PHINode>(Expr->getValue()))
+ return Expr;
+ Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
+ PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
+ if (!PredicatedRewrite)
+ return Expr;
+ for (auto *P : PredicatedRewrite->second){
+ if (!addOverflowAssumption(P))
+ return Expr;
+ }
+ return PredicatedRewrite->first;
+ }
+
SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
SCEVUnionPredicate *Pred;
const Loop *L;
@@ -10771,9 +11128,11 @@ SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
: FastID(ID), Kind(Kind) {}
SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
- const SCEVUnknown *LHS,
- const SCEVConstant *RHS)
- : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {}
+ const SCEV *LHS, const SCEV *RHS)
+ : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
+ assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
+ assert(LHS != RHS && "LHS and RHS are the same SCEV");
+}
bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
const auto *Op = dyn_cast<SCEVEqualPredicate>(N);
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 94bbc58541a7..25813c65037f 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -82,6 +82,11 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
}
+int TargetTransformInfo::getExtCost(const Instruction *I,
+ const Value *Src) const {
+ return TTIImpl->getExtCost(I, Src);
+}
+
int TargetTransformInfo::getIntrinsicCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 428bb21fbf51..90e0d6a216ee 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -588,7 +588,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(spir_func);
KEYWORD(intel_ocl_bicc);
KEYWORD(x86_64_sysvcc);
- KEYWORD(x86_64_win64cc);
+ KEYWORD(win64cc);
KEYWORD(x86_regcallcc);
KEYWORD(webkit_jscc);
KEYWORD(swiftcc);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 717eb0e00f4f..13679ce1d25c 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1670,7 +1670,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'spir_func'
/// ::= 'spir_kernel'
/// ::= 'x86_64_sysvcc'
-/// ::= 'x86_64_win64cc'
+/// ::= 'win64cc'
/// ::= 'webkit_jscc'
/// ::= 'anyregcc'
/// ::= 'preserve_mostcc'
@@ -1712,7 +1712,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_spir_func: CC = CallingConv::SPIR_FUNC; break;
case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break;
- case lltok::kw_x86_64_win64cc: CC = CallingConv::X86_64_Win64; break;
+ case lltok::kw_win64cc: CC = CallingConv::Win64; break;
case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break;
case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break;
case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break;
@@ -4411,13 +4411,15 @@ bool LLParser::ParseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
REQUIRED(tag, DwarfTagField, ); \
REQUIRED(scope, MDField, ); \
OPTIONAL(entity, MDField, ); \
+ OPTIONAL(file, MDField, ); \
OPTIONAL(line, LineField, ); \
OPTIONAL(name, MDStringField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
- Result = GET_OR_DISTINCT(DIImportedEntity, (Context, tag.Val, scope.Val,
- entity.Val, line.Val, name.Val));
+ Result = GET_OR_DISTINCT(
+ DIImportedEntity,
+ (Context, tag.Val, scope.Val, entity.Val, file.Val, line.Val, name.Val));
return false;
}
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 9c7a06de81b4..0f3707ba0d1e 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -141,7 +141,7 @@ enum Kind {
kw_spir_kernel,
kw_spir_func,
kw_x86_64_sysvcc,
- kw_x86_64_win64cc,
+ kw_win64cc,
kw_webkit_jscc,
kw_anyregcc,
kw_swiftcc,
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index b1504a8034e0..10fbcdea784f 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1671,15 +1671,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_IMPORTED_ENTITY: {
- if (Record.size() != 6)
+ if (Record.size() != 6 && Record.size() != 7)
return error("Invalid record");
IsDistinct = Record[0];
+ bool HasFile = (Record.size() == 7);
MetadataList.assignValue(
GET_OR_DISTINCT(DIImportedEntity,
(Context, Record[1], getMDOrNull(Record[2]),
- getDITypeRefOrNull(Record[3]), Record[4],
- getMDString(Record[5]))),
+ getDITypeRefOrNull(Record[3]),
+ HasFile ? getMDOrNull(Record[6]) : nullptr,
+ HasFile ? Record[4] : 0, getMDString(Record[5]))),
NextMetadataNo);
NextMetadataNo++;
break;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 0e518d2bbc8f..dcffde1742cd 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1718,6 +1718,7 @@ void ModuleBitcodeWriter::writeDIImportedEntity(
Record.push_back(VE.getMetadataOrNullID(N->getEntity()));
Record.push_back(N->getLine());
Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawFile()));
Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev);
Record.clear();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d4a90eeabe15..676c48fe5c67 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -664,8 +664,9 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
else
EntityDie = getDIE(Entity);
assert(EntityDie);
- addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(),
- Module->getScope()->getDirectory());
+ auto *File = Module->getFile();
+ addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "",
+ File ? File->getDirectory() : "");
addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
StringRef Name = Module->getName();
if (!Name.empty())
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index b7155ac2480a..45dc13d58de7 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -4267,9 +4267,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Use a worklist to iteratively look through PHI nodes, and ensure that
// the addressing mode obtained from the non-PHI roots of the graph
// are equivalent.
- Value *Consensus = nullptr;
- unsigned NumUsesConsensus = 0;
- bool IsNumUsesConsensusValid = false;
+ bool AddrModeFound = false;
bool PhiSeen = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
@@ -4280,11 +4278,17 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *V = worklist.back();
worklist.pop_back();
- // Break use-def graph loops.
- if (!Visited.insert(V).second) {
- Consensus = nullptr;
- break;
- }
+ // We allow traversing cyclic Phi nodes.
+ // In case of success after this loop we ensure that traversing through
+ // Phi nodes ends up with all cases to compute address of the form
+ // BaseGV + Base + Scale * Index + Offset
+ // where Scale and Offset are constans and BaseGV, Base and Index
+ // are exactly the same Values in all cases.
+ // It means that BaseGV, Scale and Offset dominate our memory instruction
+ // and have the same value as they had in address computation represented
+ // as Phi. So we can safely sink address computation to memory instruction.
+ if (!Visited.insert(V).second)
+ continue;
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
@@ -4297,47 +4301,26 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For non-PHIs, determine the addressing mode being computed. Note that
// the result may differ depending on what other uses our candidate
// addressing instructions might have.
- SmallVector<Instruction*, 16> NewAddrModeInsts;
+ AddrModeInsts.clear();
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI,
- InsertedInsts, PromotedInsts, TPT);
-
- // This check is broken into two cases with very similar code to avoid using
- // getNumUses() as much as possible. Some values have a lot of uses, so
- // calling getNumUses() unconditionally caused a significant compile-time
- // regression.
- if (!Consensus) {
- Consensus = V;
- AddrMode = NewAddrMode;
- AddrModeInsts = NewAddrModeInsts;
- continue;
- } else if (NewAddrMode == AddrMode) {
- if (!IsNumUsesConsensusValid) {
- NumUsesConsensus = Consensus->getNumUses();
- IsNumUsesConsensusValid = true;
- }
+ V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
+ InsertedInsts, PromotedInsts, TPT);
- // Ensure that the obtained addressing mode is equivalent to that obtained
- // for all other roots of the PHI traversal. Also, when choosing one
- // such root as representative, select the one with the most uses in order
- // to keep the cost modeling heuristics in AddressingModeMatcher
- // applicable.
- unsigned NumUses = V->getNumUses();
- if (NumUses > NumUsesConsensus) {
- Consensus = V;
- NumUsesConsensus = NumUses;
- AddrModeInsts = NewAddrModeInsts;
- }
+ if (!AddrModeFound) {
+ AddrModeFound = true;
+ AddrMode = NewAddrMode;
continue;
}
+ if (NewAddrMode == AddrMode)
+ continue;
- Consensus = nullptr;
+ AddrModeFound = false;
break;
}
// If the addressing mode couldn't be determined, or if multiple different
// ones were determined, bail out now.
- if (!Consensus) {
+ if (!AddrModeFound) {
TPT.rollback(LastKnownGood);
return false;
}
@@ -4847,25 +4830,7 @@ bool CodeGenPrepare::canFormExtLd(
if (!HasPromoted && LI->getParent() == Inst->getParent())
return false;
- EVT VT = TLI->getValueType(*DL, Inst->getType());
- EVT LoadVT = TLI->getValueType(*DL, LI->getType());
-
- // If the load has other users and the truncate is not free, this probably
- // isn't worthwhile.
- if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
- !TLI->isTruncateFree(Inst->getType(), LI->getType()))
- return false;
-
- // Check whether the target supports casts folded into loads.
- unsigned LType;
- if (isa<ZExtInst>(Inst))
- LType = ISD::ZEXTLOAD;
- else {
- assert(isa<SExtInst>(Inst) && "Unexpected ext type!");
- LType = ISD::SEXTLOAD;
- }
-
- return TLI->isLoadExtLegal(LType, VT, LoadVT);
+ return TLI->isExtLoad(LI, Inst, *DL);
}
/// Move a zext or sext fed by a load into the same basic block as the load,
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 49fb5e8f075b..5258370e6680 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -433,9 +433,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
}
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR: {
unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV ||
+ MI.getOpcode() == TargetOpcode::G_SREM ||
MI.getOpcode() == TargetOpcode::G_ASHR
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index c176de16b593..e6f80dbb8630 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -11,8 +11,6 @@
// instructions do not lengthen the critical path or the resource depth.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "machine-combiner"
-
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -32,6 +30,8 @@
using namespace llvm;
+#define DEBUG_TYPE "machine-combiner"
+
STATISTIC(NumInstCombined, "Number of machineinst combined");
namespace {
diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp
index 28ecc8f96805..b559e4e513a6 100644
--- a/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -15,7 +15,8 @@
using namespace llvm;
namespace llvm {
-template class DominanceFrontierBase<MachineBasicBlock>;
+template class DominanceFrontierBase<MachineBasicBlock, false>;
+template class DominanceFrontierBase<MachineBasicBlock, true>;
template class ForwardDominanceFrontierBase<MachineBasicBlock>;
}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 65e9e5d195a4..845e8232477c 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -31,7 +31,7 @@ static cl::opt<bool, true> VerifyMachineDomInfoX(
namespace llvm {
template class DomTreeNodeBase<MachineBasicBlock>;
-template class DominatorTreeBase<MachineBasicBlock>;
+template class DominatorTreeBase<MachineBasicBlock, false>; // DomTreeBase
}
char MachineDominatorTree::ID = 0;
@@ -49,7 +49,7 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
CriticalEdgesToSplit.clear();
NewBBs.clear();
- DT.reset(new DominatorTreeBase<MachineBasicBlock>(false));
+ DT.reset(new DomTreeBase<MachineBasicBlock>());
DT->recalculate(F);
return false;
}
@@ -144,7 +144,7 @@ void MachineDominatorTree::verifyDomTree() const {
return;
MachineFunction &F = *getRoot()->getParent();
- DominatorTreeBase<MachineBasicBlock> OtherDT(false);
+ DomTreeBase<MachineBasicBlock> OtherDT;
OtherDT.recalculate(F);
if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
DT->compare(OtherDT)) {
diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp
index c3f6e9249e7d..488377998cb3 100644
--- a/lib/CodeGen/MachinePostDominators.cpp
+++ b/lib/CodeGen/MachinePostDominators.cpp
@@ -16,6 +16,10 @@
using namespace llvm;
+namespace llvm {
+template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase
+}
+
char MachinePostDominatorTree::ID = 0;
//declare initializeMachinePostDominatorTreePass
@@ -24,8 +28,7 @@ INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
- DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
- // postdominator
+ DT = new PostDomTreeBase<MachineBasicBlock>();
}
FunctionPass *
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 71382c18fdf9..d5d3f7a61a9f 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3889,9 +3889,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Note: the SimplifyDemandedBits fold below can make an information-losing
// transform, and then we have no way to find this better fold.
if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
- ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
- SDValue SubRHS = N0.getOperand(1);
- if (SubLHS && SubLHS->isNullValue()) {
+ if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
+ SDValue SubRHS = N0.getOperand(1);
if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
return SubRHS;
@@ -4586,6 +4585,20 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
return nullptr;
}
+// if Left + Right == Sum (constant or constant splat vector)
+static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ EVT ShiftVT = Left.getValueType();
+ if (ShiftVT != Right.getValueType()) return false;
+
+ SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT,
+ Left.getNode(), Right.getNode());
+ if (!ShiftSum) return false;
+
+ ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum);
+ return CSum && CSum->getZExtValue() == Sum;
+}
+
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
@@ -4631,30 +4644,24 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
- uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
- uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
- if ((LShVal + RShVal) != EltSizeInBits)
- return nullptr;
-
+ if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) {
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue Mask = AllOnes;
if (LHSMask.getNode()) {
- APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
+ SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
- DAG.getNode(ISD::OR, DL, VT, LHSMask,
- DAG.getConstant(RHSBits, DL, VT)));
+ DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
}
if (RHSMask.getNode()) {
- APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
+ SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
- DAG.getNode(ISD::OR, DL, VT, RHSMask,
- DAG.getConstant(LHSBits, DL, VT)));
+ DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
}
Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
@@ -5272,11 +5279,21 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ unsigned Bitsize = VT.getScalarSizeInBits();
// fold (rot x, 0) -> x
if (isNullConstantOrNullSplatConstant(N1))
return N0;
+ // fold (rot x, c) -> (rot x, c % BitSize)
+ if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
+ if (Cst->getAPIntValue().uge(Bitsize)) {
+ uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
+ return DAG.getNode(N->getOpcode(), dl, VT, N0,
+ DAG.getConstant(RotAmt, dl, N1.getValueType()));
+ }
+ }
+
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -5286,22 +5303,24 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
unsigned NextOp = N0.getOpcode();
// fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
- if (NextOp == ISD::ROTL || NextOp == ISD::ROTR)
- if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1))
- if (SDNode *C2 =
- DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- bool SameSide = (N->getOpcode() == NextOp);
- unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
- if (SDValue CombinedShift =
- DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) {
- unsigned Bitsize = VT.getScalarSizeInBits();
- SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT);
- SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
- ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode());
- return DAG.getNode(
- N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm);
- }
+ if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
+ SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
+ SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
+ if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
+ EVT ShiftVT = C1->getValueType(0);
+ bool SameSide = (N->getOpcode() == NextOp);
+ unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
+ if (SDValue CombinedShift =
+ DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
+ SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
+ SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+ ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
+ BitsizeC.getNode());
+ return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
+ CombinedShiftNorm);
}
+ }
+ }
return SDValue();
}
@@ -7152,8 +7171,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
- CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
- return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N, 0);
}
}
@@ -7210,8 +7235,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
- CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- return CombineTo(N, And); // Return N so it doesn't get rechecked!
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ CombineTo(N, And);
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
@@ -7451,8 +7481,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
- CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
- return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked!
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -7503,8 +7539,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
- CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- return CombineTo(N, And); // Return N so it doesn't get rechecked!
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ CombineTo(N, And);
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
@@ -7676,13 +7717,18 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
- CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::ANY_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = N0.hasOneUse();
+ CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -11373,12 +11419,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
MVT::Other, Chain, ReplLoad.getValue(1));
- // Make sure the new and old chains are cleaned up.
- AddToWorklist(Token.getNode());
-
- // Replace uses with load result and token factor. Don't add users
- // to work list.
- return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ // Replace uses with load result and token factor
+ return CombineTo(N, ReplLoad.getValue(0), Token);
}
}
@@ -12744,7 +12786,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
!NoVectors) {
// Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
+ unsigned Elts = i + 1;
+ if (MemVT.isVector()) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
@@ -13003,7 +13050,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
- MachineMemOperand::Flags MMOFlags = isDereferenceable ?
+ MachineMemOperand::Flags MMOFlags = isDereferenceable ?
MachineMemOperand::MODereferenceable:
MachineMemOperand::MONone;
@@ -16703,6 +16750,20 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // If the base are the same frame index but the we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ !MFI.isFixedObjectIndex(B->getIndex())))
+ return false;
+ }
+
// FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
// modified to use BaseIndexOffset.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index ac3247948169..75fec7bd1d48 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1827,10 +1827,11 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
ISD::UADDO : ISD::USUBO,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+
if (hasOVF) {
EVT OvfVT = getSetCCResultType(NVT);
SDVTList VTList = DAG.getVTList(NVT, OvfVT);
- TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
int RevOpc;
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
@@ -1863,6 +1864,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
ISD::SETULT);
+
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
+ SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
+ return;
+ }
+
SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
@@ -1877,9 +1885,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
- SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
- DAG.getConstant(1, dl, NVT),
- DAG.getConstant(0, dl, NVT));
+
+ SDValue Borrow;
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
+ Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
+ else
+ Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+
Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 1a8d5a4f45da..0b4c6e551667 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -142,9 +142,9 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
return false; // Invalid value for threshold.
// Count the number of MachineInstr`s in MachineFunction
- int64_t MICount = 0;
- for (const auto& MBB : MF)
- MICount += MBB.size();
+ int64_t MICount = 0;
+ for (const auto& MBB : MF)
+ MICount += MBB.size();
// Check if we have a loop.
// FIXME: Maybe make this smarter, and see whether the loops are dependent
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index 22f166a2335d..79b9fdefd40e 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -14,7 +14,6 @@
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
-#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -42,13 +41,6 @@ static Error visitKnownMember(CVMemberRecord &Record,
return Error::success();
}
-static Expected<TypeServer2Record> deserializeTypeServerRecord(CVType &Record) {
- TypeServer2Record R(TypeRecordKind::TypeServer2);
- if (auto EC = TypeDeserializer::deserializeAs(Record, R))
- return std::move(EC);
- return R;
-}
-
static Error visitMemberRecord(CVMemberRecord &Record,
TypeVisitorCallbacks &Callbacks) {
if (auto EC = Callbacks.visitMemberBegin(Record))
@@ -84,8 +76,6 @@ class CVTypeVisitor {
public:
explicit CVTypeVisitor(TypeVisitorCallbacks &Callbacks);
- void addTypeServerHandler(TypeServerHandler &Handler);
-
Error visitTypeRecord(CVType &Record, TypeIndex Index);
Error visitTypeRecord(CVType &Record);
@@ -98,45 +88,15 @@ public:
Error visitFieldListMemberStream(BinaryStreamReader &Stream);
private:
- Expected<bool> handleTypeServer(CVType &Record);
Error finishVisitation(CVType &Record);
/// The interface to the class that gets notified of each visitation.
TypeVisitorCallbacks &Callbacks;
-
- TinyPtrVector<TypeServerHandler *> Handlers;
};
CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks)
: Callbacks(Callbacks) {}
-void CVTypeVisitor::addTypeServerHandler(TypeServerHandler &Handler) {
- Handlers.push_back(&Handler);
-}
-
-Expected<bool> CVTypeVisitor::handleTypeServer(CVType &Record) {
- if (Record.Type == TypeLeafKind::LF_TYPESERVER2 && !Handlers.empty()) {
- auto TS = deserializeTypeServerRecord(Record);
- if (!TS)
- return TS.takeError();
-
- for (auto Handler : Handlers) {
- auto ExpectedResult = Handler->handle(*TS, Callbacks);
- // If there was an error, return the error.
- if (!ExpectedResult)
- return ExpectedResult.takeError();
-
- // If the handler processed the record, return success.
- if (*ExpectedResult)
- return true;
-
- // Otherwise keep searching for a handler, eventually falling out and
- // using the default record handler.
- }
- }
- return false;
-}
-
Error CVTypeVisitor::finishVisitation(CVType &Record) {
switch (Record.Type) {
default:
@@ -163,12 +123,6 @@ Error CVTypeVisitor::finishVisitation(CVType &Record) {
}
Error CVTypeVisitor::visitTypeRecord(CVType &Record, TypeIndex Index) {
- auto ExpectedResult = handleTypeServer(Record);
- if (!ExpectedResult)
- return ExpectedResult.takeError();
- if (*ExpectedResult)
- return Error::success();
-
if (auto EC = Callbacks.visitTypeBegin(Record, Index))
return EC;
@@ -176,12 +130,6 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record, TypeIndex Index) {
}
Error CVTypeVisitor::visitTypeRecord(CVType &Record) {
- auto ExpectedResult = handleTypeServer(Record);
- if (!ExpectedResult)
- return ExpectedResult.takeError();
- if (*ExpectedResult)
- return Error::success();
-
if (auto EC = Callbacks.visitTypeBegin(Record))
return EC;
@@ -271,52 +219,37 @@ struct VisitHelper {
Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
TypeVisitorCallbacks &Callbacks,
- VisitorDataSource Source,
- TypeServerHandler *TS) {
+ VisitorDataSource Source) {
VisitHelper V(Callbacks, Source);
- if (TS)
- V.Visitor.addTypeServerHandler(*TS);
return V.Visitor.visitTypeRecord(Record, Index);
}
Error llvm::codeview::visitTypeRecord(CVType &Record,
TypeVisitorCallbacks &Callbacks,
- VisitorDataSource Source,
- TypeServerHandler *TS) {
+ VisitorDataSource Source) {
VisitHelper V(Callbacks, Source);
- if (TS)
- V.Visitor.addTypeServerHandler(*TS);
return V.Visitor.visitTypeRecord(Record);
}
Error llvm::codeview::visitTypeStream(const CVTypeArray &Types,
TypeVisitorCallbacks &Callbacks,
- VisitorDataSource Source,
- TypeServerHandler *TS) {
+ VisitorDataSource Source) {
VisitHelper V(Callbacks, Source);
- if (TS)
- V.Visitor.addTypeServerHandler(*TS);
return V.Visitor.visitTypeStream(Types);
}
Error llvm::codeview::visitTypeStream(CVTypeRange Types,
- TypeVisitorCallbacks &Callbacks,
- TypeServerHandler *TS) {
+ TypeVisitorCallbacks &Callbacks) {
VisitHelper V(Callbacks, VDS_BytesPresent);
- if (TS)
- V.Visitor.addTypeServerHandler(*TS);
return V.Visitor.visitTypeStream(Types);
}
Error llvm::codeview::visitTypeStream(TypeCollection &Types,
- TypeVisitorCallbacks &Callbacks,
- TypeServerHandler *TS) {
+ TypeVisitorCallbacks &Callbacks) {
// When the internal visitor calls Types.getType(Index) the interface is
// required to return a CVType with the bytes filled out. So we can assume
// that the bytes will be present when individual records are visited.
VisitHelper V(Callbacks, VDS_BytesPresent);
- if (TS)
- V.Visitor.addTypeServerHandler(*TS);
return V.Visitor.visitTypeStream(Types);
}
diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 711144fc2faa..4fc14480578e 100644
--- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -168,18 +168,19 @@ Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
return Error::success();
}
-Error CodeViewRecordIO::mapGuid(StringRef &Guid) {
+Error CodeViewRecordIO::mapGuid(GUID &Guid) {
constexpr uint32_t GuidSize = 16;
if (maxFieldLength() < GuidSize)
return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
if (isWriting()) {
- assert(Guid.size() == 16 && "Invalid Guid Size!");
- if (auto EC = Writer->writeFixedString(Guid))
+ if (auto EC = Writer->writeBytes(Guid.Guid))
return EC;
} else {
- if (auto EC = Reader->readFixedString(Guid, 16))
+ ArrayRef<uint8_t> GuidBytes;
+ if (auto EC = Reader->readBytes(GuidBytes, GuidSize))
return EC;
+ memcpy(Guid.Guid, GuidBytes.data(), GuidSize);
}
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/Formatters.cpp b/lib/DebugInfo/CodeView/Formatters.cpp
index 1fa8d219d6ac..b8d89c76da3b 100644
--- a/lib/DebugInfo/CodeView/Formatters.cpp
+++ b/lib/DebugInfo/CodeView/Formatters.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/CodeView/Formatters.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/GUID.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -39,3 +40,9 @@ void GuidAdapter::format(raw_ostream &Stream, StringRef Style) {
}
Stream << "}";
}
+
+raw_ostream &llvm::codeview::operator<<(raw_ostream &OS, const GUID &Guid) {
+ codeview::detail::GuidAdapter A(Guid.Guid);
+ A.format(OS, "");
+ return OS;
+}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index c2c02f8de03f..62e73acc72d6 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -186,7 +186,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
BuildInfoSym &BuildInfo) {
- W.printNumber("BuildId", BuildInfo.BuildId);
+ printTypeIndex("BuildId", BuildInfo.BuildId);
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index 589966705015..e18a35ca1f38 100644
--- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -354,7 +354,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) {
}
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) {
- W->printString("Guid", formatv("{0}", fmt_guid(TS.getGuid())).str());
+ W->printString("Guid", formatv("{0}", TS.getGuid()).str());
W->printNumber("Age", TS.getAge());
W->printString("Name", TS.getName());
return Error::success();
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index 71a0966df036..bff3516203a0 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -10,13 +10,11 @@
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -57,56 +55,35 @@ namespace {
/// streams: an item (or IPI) stream and a type stream, as this is what is
/// actually stored in the final PDB. We choose which records go where by
/// looking at the record kind.
-class TypeStreamMerger : public TypeVisitorCallbacks {
+class TypeStreamMerger {
public:
- explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest,
- TypeServerHandler *Handler)
- : Handler(Handler), IndexMap(SourceToDest) {
+ explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
+ : IndexMap(SourceToDest) {
SourceToDest.clear();
}
static const TypeIndex Untranslated;
- Error visitTypeBegin(CVType &Record) override;
- Error visitTypeEnd(CVType &Record) override;
-
Error mergeTypesAndIds(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes,
- const CVTypeArray &IdsAndTypes);
+ const CVTypeArray &IdsAndTypes);
Error mergeIdRecords(TypeTableBuilder &Dest,
ArrayRef<TypeIndex> TypeSourceToDest,
- const CVTypeArray &Ids);
+ const CVTypeArray &Ids);
Error mergeTypeRecords(TypeTableBuilder &Dest, const CVTypeArray &Types);
private:
Error doit(const CVTypeArray &Types);
+ Error remapAllTypes(const CVTypeArray &Types);
+
+ Error remapType(const CVType &Type);
+
void addMapping(TypeIndex Idx);
bool remapTypeIndex(TypeIndex &Idx);
bool remapItemIndex(TypeIndex &Idx);
- bool remapIndices(RemappedType &Record, ArrayRef<TiReference> Refs) {
- auto OriginalData = Record.OriginalRecord.content();
- bool Success = true;
- for (auto &Ref : Refs) {
- uint32_t Offset = Ref.Offset;
- ArrayRef<uint8_t> Bytes =
- OriginalData.slice(Ref.Offset, sizeof(TypeIndex));
- ArrayRef<TypeIndex> TIs(reinterpret_cast<const TypeIndex *>(Bytes.data()),
- Ref.Count);
- for (auto TI : TIs) {
- TypeIndex NewTI = TI;
- bool ThisSuccess = (Ref.Kind == TiRefKind::IndexRef)
- ? remapItemIndex(NewTI)
- : remapTypeIndex(NewTI);
- if (ThisSuccess && NewTI != TI)
- Record.Mappings.emplace_back(Offset, NewTI);
- Offset += sizeof(TypeIndex);
- Success &= ThisSuccess;
- }
- }
- return Success;
- }
+ bool remapIndices(RemappedType &Record, ArrayRef<TiReference> Refs);
bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
@@ -128,21 +105,6 @@ private:
return Error::success();
}
- Error writeTypeRecord(const CVType &Record) {
- TypeIndex DestIdx =
- DestTypeStream->writeSerializedRecord(Record.RecordData);
- addMapping(DestIdx);
- return Error::success();
- }
-
- Error writeTypeRecord(const RemappedType &Record, bool RemapSuccess) {
- return writeRecord(*DestTypeStream, Record, RemapSuccess);
- }
-
- Error writeIdRecord(const RemappedType &Record, bool RemapSuccess) {
- return writeRecord(*DestIdStream, Record, RemapSuccess);
- }
-
Optional<Error> LastError;
bool IsSecondPass = false;
@@ -153,7 +115,6 @@ private:
TypeTableBuilder *DestIdStream = nullptr;
TypeTableBuilder *DestTypeStream = nullptr;
- TypeServerHandler *Handler = nullptr;
// If we're only mapping id records, this array contains the mapping for
// type records.
@@ -168,12 +129,8 @@ private:
const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
-Error TypeStreamMerger::visitTypeBegin(CVType &Rec) {
- RemappedType R(Rec);
- SmallVector<TiReference, 32> Refs;
- discoverTypeIndices(Rec.RecordData, Refs);
- bool Success = remapIndices(R, Refs);
- switch (Rec.kind()) {
+static bool isIdRecord(TypeLeafKind K) {
+ switch (K) {
case TypeLeafKind::LF_FUNC_ID:
case TypeLeafKind::LF_MFUNC_ID:
case TypeLeafKind::LF_STRING_ID:
@@ -181,19 +138,10 @@ Error TypeStreamMerger::visitTypeBegin(CVType &Rec) {
case TypeLeafKind::LF_BUILDINFO:
case TypeLeafKind::LF_UDT_SRC_LINE:
case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
- return writeIdRecord(R, Success);
+ return true;
default:
- return writeTypeRecord(R, Success);
+ return false;
}
- return Error::success();
-}
-
-Error TypeStreamMerger::visitTypeEnd(CVType &Rec) {
- ++CurIndex;
- if (!IsSecondPass)
- assert(IndexMap.size() == slotForIndex(CurIndex) &&
- "visitKnownRecord should add one index map entry");
- return Error::success();
}
void TypeStreamMerger::addMapping(TypeIndex Idx) {
@@ -256,7 +204,7 @@ bool TypeStreamMerger::remapItemIndex(TypeIndex &Idx) {
}
Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest,
- const CVTypeArray &Types) {
+ const CVTypeArray &Types) {
DestTypeStream = &Dest;
return doit(Types);
@@ -264,7 +212,7 @@ Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest,
Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest,
ArrayRef<TypeIndex> TypeSourceToDest,
- const CVTypeArray &Ids) {
+ const CVTypeArray &Ids) {
DestIdStream = &Dest;
TypeLookup = TypeSourceToDest;
@@ -273,25 +221,14 @@ Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest,
Error TypeStreamMerger::mergeTypesAndIds(TypeTableBuilder &DestIds,
TypeTableBuilder &DestTypes,
- const CVTypeArray &IdsAndTypes) {
+ const CVTypeArray &IdsAndTypes) {
DestIdStream = &DestIds;
DestTypeStream = &DestTypes;
-
return doit(IdsAndTypes);
}
Error TypeStreamMerger::doit(const CVTypeArray &Types) {
- LastError = Error::success();
-
- // We don't want to deserialize records. I guess this flag is poorly named,
- // but it really means "Don't deserialize records before switching on the
- // concrete type.
- // FIXME: We can probably get even more speed here if we don't use the visitor
- // pipeline here, but instead write the switch ourselves. I don't think it
- // would buy us much since it's already pretty fast, but it's probably worth
- // a few cycles.
- if (auto EC =
- codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler))
+ if (auto EC = remapAllTypes(Types))
return EC;
// If we found bad indices but no other errors, try doing another pass and see
@@ -301,50 +238,92 @@ Error TypeStreamMerger::doit(const CVTypeArray &Types) {
// topologically sorted. The standard library contains MASM-produced objects,
// so this is important to handle correctly, but we don't have to be too
// efficient. MASM type streams are usually very small.
- while (!*LastError && NumBadIndices > 0) {
+ while (!LastError && NumBadIndices > 0) {
unsigned BadIndicesRemaining = NumBadIndices;
IsSecondPass = true;
NumBadIndices = 0;
CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
- if (auto EC =
- codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler))
+ if (auto EC = remapAllTypes(Types))
return EC;
assert(NumBadIndices <= BadIndicesRemaining &&
"second pass found more bad indices");
- if (!*LastError && NumBadIndices == BadIndicesRemaining) {
+ if (!LastError && NumBadIndices == BadIndicesRemaining) {
return llvm::make_error<CodeViewError>(
cv_error_code::corrupt_record, "input type graph contains cycles");
}
}
- Error Ret = std::move(*LastError);
- LastError.reset();
- return Ret;
+ if (LastError)
+ return std::move(*LastError);
+ return Error::success();
+}
+
+Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
+ for (const CVType &Type : Types)
+ if (auto EC = remapType(Type))
+ return EC;
+ return Error::success();
+}
+
+Error TypeStreamMerger::remapType(const CVType &Type) {
+ RemappedType R(Type);
+ SmallVector<TiReference, 32> Refs;
+ discoverTypeIndices(Type.RecordData, Refs);
+ bool MappedAllIndices = remapIndices(R, Refs);
+ TypeTableBuilder &Dest =
+ isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
+ if (auto EC = writeRecord(Dest, R, MappedAllIndices))
+ return EC;
+
+ ++CurIndex;
+ assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
+ "visitKnownRecord should add one index map entry");
+ return Error::success();
+}
+
+bool TypeStreamMerger::remapIndices(RemappedType &Record,
+ ArrayRef<TiReference> Refs) {
+ ArrayRef<uint8_t> OriginalData = Record.OriginalRecord.content();
+ bool Success = true;
+ for (auto &Ref : Refs) {
+ uint32_t Offset = Ref.Offset;
+ ArrayRef<uint8_t> Bytes = OriginalData.slice(Ref.Offset, sizeof(TypeIndex));
+ ArrayRef<TypeIndex> TIs(reinterpret_cast<const TypeIndex *>(Bytes.data()),
+ Ref.Count);
+ for (auto TI : TIs) {
+ TypeIndex NewTI = TI;
+ bool ThisSuccess = (Ref.Kind == TiRefKind::IndexRef)
+ ? remapItemIndex(NewTI)
+ : remapTypeIndex(NewTI);
+ if (ThisSuccess && NewTI != TI)
+ Record.Mappings.emplace_back(Offset, NewTI);
+ Offset += sizeof(TypeIndex);
+ Success &= ThisSuccess;
+ }
+ }
+ return Success;
}
Error llvm::codeview::mergeTypeRecords(TypeTableBuilder &Dest,
SmallVectorImpl<TypeIndex> &SourceToDest,
- TypeServerHandler *Handler,
- const CVTypeArray &Types) {
- TypeStreamMerger M(SourceToDest, Handler);
+ const CVTypeArray &Types) {
+ TypeStreamMerger M(SourceToDest);
return M.mergeTypeRecords(Dest, Types);
}
Error llvm::codeview::mergeIdRecords(TypeTableBuilder &Dest,
ArrayRef<TypeIndex> TypeSourceToDest,
SmallVectorImpl<TypeIndex> &SourceToDest,
- const CVTypeArray &Ids) {
- TypeStreamMerger M(SourceToDest, nullptr);
+ const CVTypeArray &Ids) {
+ TypeStreamMerger M(SourceToDest);
return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
}
Error llvm::codeview::mergeTypeAndIdRecords(
TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes,
- SmallVectorImpl<TypeIndex> &SourceToDest, TypeServerHandler *Handler,
- const CVTypeArray &IdsAndTypes) {
-
- TypeStreamMerger M(SourceToDest, Handler);
+ SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes) {
+ TypeStreamMerger M(SourceToDest);
return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes);
}
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 0a10e6b78911..6cf44ffa3796 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -24,22 +24,166 @@ using namespace llvm;
using namespace dwarf;
using namespace object;
-void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
- DWARFAttribute &AttrValue) {
+bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
+ uint32_t *Offset, unsigned UnitIndex,
+ uint8_t &UnitType, bool &isUnitDWARF64) {
+ uint32_t AbbrOffset, Length;
+ uint8_t AddrSize = 0;
+ uint16_t Version;
+ bool Success = true;
+
+ bool ValidLength = false;
+ bool ValidVersion = false;
+ bool ValidAddrSize = false;
+ bool ValidType = true;
+ bool ValidAbbrevOffset = true;
+
+ uint32_t OffsetStart = *Offset;
+ Length = DebugInfoData.getU32(Offset);
+ if (Length == UINT32_MAX) {
+ isUnitDWARF64 = true;
+ OS << format(
+ "Unit[%d] is in 64-bit DWARF format; cannot verify from this point.\n",
+ UnitIndex);
+ return false;
+ }
+ Version = DebugInfoData.getU16(Offset);
+
+ if (Version >= 5) {
+ UnitType = DebugInfoData.getU8(Offset);
+ AddrSize = DebugInfoData.getU8(Offset);
+ AbbrOffset = DebugInfoData.getU32(Offset);
+ ValidType = DWARFUnit::isValidUnitType(UnitType);
+ } else {
+ UnitType = 0;
+ AbbrOffset = DebugInfoData.getU32(Offset);
+ AddrSize = DebugInfoData.getU8(Offset);
+ }
+
+ if (!DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset))
+ ValidAbbrevOffset = false;
+
+ ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3);
+ ValidVersion = DWARFContext::isSupportedVersion(Version);
+ ValidAddrSize = AddrSize == 4 || AddrSize == 8;
+ if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset ||
+ !ValidType) {
+ Success = false;
+ OS << format("Units[%d] - start offset: 0x%08x \n", UnitIndex, OffsetStart);
+ if (!ValidLength)
+ OS << "\tError: The length for this unit is too "
+ "large for the .debug_info provided.\n";
+ if (!ValidVersion)
+ OS << "\tError: The 16 bit unit header version is not valid.\n";
+ if (!ValidType)
+ OS << "\tError: The unit type encoding is not valid.\n";
+ if (!ValidAbbrevOffset)
+ OS << "\tError: The offset into the .debug_abbrev section is "
+ "not valid.\n";
+ if (!ValidAddrSize)
+ OS << "\tError: The address size is unsupported.\n";
+ }
+ *Offset = OffsetStart + Length + 4;
+ return Success;
+}
+
+bool DWARFVerifier::verifyUnitContents(DWARFUnit Unit) {
+ uint32_t NumUnitErrors = 0;
+ unsigned NumDies = Unit.getNumDIEs();
+ for (unsigned I = 0; I < NumDies; ++I) {
+ auto Die = Unit.getDIEAtIndex(I);
+ if (Die.getTag() == DW_TAG_null)
+ continue;
+ for (auto AttrValue : Die.attributes()) {
+ NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
+ NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
+ }
+ }
+ return NumUnitErrors == 0;
+}
+
+bool DWARFVerifier::handleDebugInfo() {
+ OS << "Verifying .debug_info Unit Header Chain...\n";
+
+ DWARFDataExtractor DebugInfoData(DCtx.getInfoSection(), DCtx.isLittleEndian(),
+ 0);
+ uint32_t NumDebugInfoErrors = 0;
+ uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
+ uint8_t UnitType = 0;
+ bool isUnitDWARF64 = false;
+ bool isHeaderChainValid = true;
+ bool hasDIE = DebugInfoData.isValidOffset(Offset);
+ while (hasDIE) {
+ OffsetStart = Offset;
+ if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
+ isUnitDWARF64)) {
+ isHeaderChainValid = false;
+ if (isUnitDWARF64)
+ break;
+ } else {
+ std::unique_ptr<DWARFUnit> Unit;
+ switch (UnitType) {
+ case dwarf::DW_UT_type:
+ case dwarf::DW_UT_split_type: {
+ DWARFUnitSection<DWARFTypeUnit> TUSection{};
+ Unit.reset(new DWARFTypeUnit(
+ DCtx, DCtx.getInfoSection(), DCtx.getDebugAbbrev(),
+ &DCtx.getRangeSection(), DCtx.getStringSection(),
+ DCtx.getStringOffsetSection(), &DCtx.getAppleObjCSection(),
+ DCtx.getLineSection(), DCtx.isLittleEndian(), false, TUSection,
+ nullptr));
+ break;
+ }
+ case dwarf::DW_UT_skeleton:
+ case dwarf::DW_UT_split_compile:
+ case dwarf::DW_UT_compile:
+ case dwarf::DW_UT_partial:
+ // UnitType = 0 means that we are
+ // verifying a compile unit in DWARF v4.
+ case 0: {
+ DWARFUnitSection<DWARFCompileUnit> CUSection{};
+ Unit.reset(new DWARFCompileUnit(
+ DCtx, DCtx.getInfoSection(), DCtx.getDebugAbbrev(),
+ &DCtx.getRangeSection(), DCtx.getStringSection(),
+ DCtx.getStringOffsetSection(), &DCtx.getAppleObjCSection(),
+ DCtx.getLineSection(), DCtx.isLittleEndian(), false, CUSection,
+ nullptr));
+ break;
+ }
+ default: { llvm_unreachable("Invalid UnitType."); }
+ }
+ Unit->extract(DebugInfoData, &OffsetStart);
+ if (!verifyUnitContents(*Unit))
+ ++NumDebugInfoErrors;
+ }
+ hasDIE = DebugInfoData.isValidOffset(Offset);
+ ++UnitIdx;
+ }
+ if (UnitIdx == 0 && !hasDIE) {
+ OS << "Warning: .debug_info is empty.\n";
+ isHeaderChainValid = true;
+ }
+ NumDebugInfoErrors += verifyDebugInfoReferences();
+ return (isHeaderChainValid && NumDebugInfoErrors == 0);
+}
+
+unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
+ DWARFAttribute &AttrValue) {
+ unsigned NumErrors = 0;
const auto Attr = AttrValue.Attr;
switch (Attr) {
case DW_AT_ranges:
// Make sure the offset in the DW_AT_ranges attribute is valid.
if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
if (*SectionOffset >= DCtx.getRangeSection().Data.size()) {
- ++NumDebugInfoErrors;
+ ++NumErrors;
OS << "error: DW_AT_ranges offset is beyond .debug_ranges "
"bounds:\n";
Die.dump(OS, 0);
OS << "\n";
}
} else {
- ++NumDebugInfoErrors;
+ ++NumErrors;
OS << "error: DIE has invalid DW_AT_ranges encoding:\n";
Die.dump(OS, 0);
OS << "\n";
@@ -49,7 +193,7 @@ void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
// Make sure the offset in the DW_AT_stmt_list attribute is valid.
if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
if (*SectionOffset >= DCtx.getLineSection().Data.size()) {
- ++NumDebugInfoErrors;
+ ++NumErrors;
OS << "error: DW_AT_stmt_list offset is beyond .debug_line "
"bounds: "
<< format("0x%08" PRIx32, *SectionOffset) << "\n";
@@ -57,7 +201,7 @@ void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
OS << "\n";
}
} else {
- ++NumDebugInfoErrors;
+ ++NumErrors;
OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n";
Die.dump(OS, 0);
OS << "\n";
@@ -67,10 +211,12 @@ void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
default:
break;
}
+ return NumErrors;
}
-void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
- DWARFAttribute &AttrValue) {
+unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
+ DWARFAttribute &AttrValue) {
+ unsigned NumErrors = 0;
const auto Form = AttrValue.Value.getForm();
switch (Form) {
case DW_FORM_ref1:
@@ -86,7 +232,7 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
auto CUOffset = AttrValue.Value.getRawUValue();
if (CUOffset >= CUSize) {
- ++NumDebugInfoErrors;
+ ++NumErrors;
OS << "error: " << FormEncodingString(Form) << " CU offset "
<< format("0x%08" PRIx32, CUOffset)
<< " is invalid (must be less than CU size of "
@@ -108,7 +254,7 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
assert(RefVal);
if (RefVal) {
if (*RefVal >= DCtx.getInfoSection().Data.size()) {
- ++NumDebugInfoErrors;
+ ++NumErrors;
OS << "error: DW_FORM_ref_addr offset beyond .debug_info "
"bounds:\n";
Die.dump(OS, 0);
@@ -125,7 +271,7 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
auto SecOffset = AttrValue.Value.getAsSectionOffset();
assert(SecOffset); // DW_FORM_strp is a section offset.
if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) {
- ++NumDebugInfoErrors;
+ ++NumErrors;
OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n";
Die.dump(OS, 0);
OS << "\n";
@@ -135,17 +281,19 @@ void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
default:
break;
}
+ return NumErrors;
}
-void DWARFVerifier::verifyDebugInfoReferences() {
+unsigned DWARFVerifier::verifyDebugInfoReferences() {
// Take all references and make sure they point to an actual DIE by
// getting the DIE by offset and emitting an error
OS << "Verifying .debug_info references...\n";
+ unsigned NumErrors = 0;
for (auto Pair : ReferenceToDIEOffsets) {
auto Die = DCtx.getDIEForOffset(Pair.first);
if (Die)
continue;
- ++NumDebugInfoErrors;
+ ++NumErrors;
OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
<< ". Offset is in between DIEs:\n";
for (auto Offset : Pair.second) {
@@ -155,26 +303,7 @@ void DWARFVerifier::verifyDebugInfoReferences() {
}
OS << "\n";
}
-}
-
-bool DWARFVerifier::handleDebugInfo() {
- NumDebugInfoErrors = 0;
- OS << "Verifying .debug_info...\n";
- for (const auto &CU : DCtx.compile_units()) {
- unsigned NumDies = CU->getNumDIEs();
- for (unsigned I = 0; I < NumDies; ++I) {
- auto Die = CU->getDIEAtIndex(I);
- const auto Tag = Die.getTag();
- if (Tag == DW_TAG_null)
- continue;
- for (auto AttrValue : Die.attributes()) {
- verifyDebugInfoAttribute(Die, AttrValue);
- verifyDebugInfoForm(Die, AttrValue);
- }
- }
- }
- verifyDebugInfoReferences();
- return NumDebugInfoErrors == 0;
+ return NumErrors;
}
void DWARFVerifier::verifyDebugLineStmtOffsets() {
diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt
index ff01c948e099..9b1f37943e67 100644
--- a/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/lib/DebugInfo/PDB/CMakeLists.txt
@@ -52,7 +52,6 @@ add_pdb_impl_folder(Native
Native/PDBFileBuilder.cpp
Native/PDBStringTable.cpp
Native/PDBStringTableBuilder.cpp
- Native/PDBTypeServerHandler.cpp
Native/PublicsStream.cpp
Native/PublicsStreamBuilder.cpp
Native/RawError.cpp
diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index 0b48a366bd24..4c59d2f2a9d9 100644
--- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -125,16 +125,16 @@ PrivateGetDIAValue(IDiaSymbol *Symbol,
return Result8;
}
-PDB_UniqueId
+codeview::GUID
PrivateGetDIAValue(IDiaSymbol *Symbol,
HRESULT (__stdcall IDiaSymbol::*Method)(GUID *)) {
GUID Result;
if (S_OK != (Symbol->*Method)(&Result))
- return PDB_UniqueId();
+ return codeview::GUID();
- static_assert(sizeof(PDB_UniqueId) == sizeof(GUID),
- "PDB_UniqueId is the wrong size!");
- PDB_UniqueId IdResult;
+ static_assert(sizeof(codeview::GUID) == sizeof(GUID),
+ "GUID is the wrong size!");
+ codeview::GUID IdResult;
::memcpy(&IdResult, &Result, sizeof(GUID));
return IdResult;
}
@@ -746,7 +746,7 @@ PDB_SymType DIARawSymbol::getSymTag() const {
&IDiaSymbol::get_symTag);
}
-PDB_UniqueId DIARawSymbol::getGuid() const {
+codeview::GUID DIARawSymbol::getGuid() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_guid);
}
diff --git a/lib/DebugInfo/PDB/GenericError.cpp b/lib/DebugInfo/PDB/GenericError.cpp
index 789f3b813170..4fcecb92fd15 100644
--- a/lib/DebugInfo/PDB/GenericError.cpp
+++ b/lib/DebugInfo/PDB/GenericError.cpp
@@ -26,6 +26,8 @@ public:
switch (static_cast<generic_error_code>(Condition)) {
case generic_error_code::unspecified:
return "An unknown error has occurred.";
+ case generic_error_code::type_server_not_found:
+ return "Type server PDB was not found.";
case generic_error_code::dia_sdk_not_present:
return "LLVM was not compiled with support for DIA. This usually means "
"that you are are not using MSVC, or your Visual Studio "
diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp
index 21b66b3e7bcf..829879060c33 100644
--- a/lib/DebugInfo/PDB/Native/InfoStream.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp
@@ -118,7 +118,7 @@ uint32_t InfoStream::getSignature() const { return Signature; }
uint32_t InfoStream::getAge() const { return Age; }
-PDB_UniqueId InfoStream::getGuid() const { return Guid; }
+GUID InfoStream::getGuid() const { return Guid; }
uint32_t InfoStream::getNamedStreamMapByteSize() const {
return NamedStreamMapByteSize;
diff --git a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index 707128f7efd4..6450ae752f96 100644
--- a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -34,7 +34,7 @@ void InfoStreamBuilder::setSignature(uint32_t S) { Sig = S; }
void InfoStreamBuilder::setAge(uint32_t A) { Age = A; }
-void InfoStreamBuilder::setGuid(PDB_UniqueId G) { Guid = G; }
+void InfoStreamBuilder::setGuid(GUID G) { Guid = G; }
void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) {
Features.push_back(Sig);
diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
index cb0830f453c8..3241000b06db 100644
--- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -56,12 +56,12 @@ std::string NativeExeSymbol::getSymbolsFileName() const {
return File.getFilePath();
}
-PDB_UniqueId NativeExeSymbol::getGuid() const {
+codeview::GUID NativeExeSymbol::getGuid() const {
auto IS = File.getPDBInfoStream();
if (IS)
return IS->getGuid();
consumeError(IS.takeError());
- return PDB_UniqueId{{0}};
+ return codeview::GUID{{0}};
}
bool NativeExeSymbol::hasCTypes() const {
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index 92612bcea4ac..df3f418052a9 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -323,9 +323,7 @@ PDB_SymType NativeRawSymbol::getSymTag() const {
return PDB_SymType::None;
}
-PDB_UniqueId NativeRawSymbol::getGuid() const {
- return PDB_UniqueId{{0}};
-}
+codeview::GUID NativeRawSymbol::getGuid() const { return codeview::GUID{{0}}; }
int32_t NativeRawSymbol::getOffset() const {
return 0;
diff --git a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
deleted file mode 100644
index 9fd90102f72c..000000000000
--- a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-//===- PDBTypeServerHandler.cpp ---------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Handles CodeView LF_TYPESERVER2 records by attempting to locate a matching
-// PDB file, then loading the PDB file and visiting all types from the
-// referenced PDB using the original supplied visitor.
-//
-// The net effect of this is that when visiting a PDB containing a TypeServer
-// record, the TypeServer record is "replaced" with all of the records in
-// the referenced PDB file. If a single instance of PDBTypeServerHandler
-// encounters the same TypeServer multiple times (for example reusing one
-// PDBTypeServerHandler across multiple visitations of distinct object files or
-// PDB files), PDBTypeServerHandler will optionally revisit all the records
-// again, or simply consume the record and do nothing.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h"
-
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
-#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
-#include "llvm/DebugInfo/PDB/PDB.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-using namespace llvm::pdb;
-
-static void ignoreErrors(Error EC) {
- llvm::handleAllErrors(std::move(EC), [&](ErrorInfoBase &EIB) {});
-}
-
-PDBTypeServerHandler::PDBTypeServerHandler(bool RevisitAlways)
- : RevisitAlways(RevisitAlways) {}
-
-void PDBTypeServerHandler::addSearchPath(StringRef Path) {
- if (Path.empty() || !sys::fs::is_directory(Path))
- return;
-
- SearchPaths.insert(Path);
-}
-
-Expected<bool>
-PDBTypeServerHandler::handleInternal(PDBFile &File,
- TypeVisitorCallbacks &Callbacks) {
- auto ExpectedTpi = File.getPDBTpiStream();
- if (!ExpectedTpi)
- return ExpectedTpi.takeError();
-
- // For handling a type server, we should be using whatever the callback array
- // was
- // that is being used for the original file. We shouldn't allow the visitor
- // to
- // arbitrarily stick a deserializer in there.
- if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks,
- VDS_BytesExternal))
- return std::move(EC);
-
- return true;
-}
-
-Expected<bool> PDBTypeServerHandler::handle(TypeServer2Record &TS,
- TypeVisitorCallbacks &Callbacks) {
- if (Session) {
- // If we've already handled this TypeServer and we only want to handle each
- // TypeServer once, consume the record without doing anything.
- if (!RevisitAlways)
- return true;
-
- return handleInternal(Session->getPDBFile(), Callbacks);
- }
-
- StringRef File = sys::path::filename(TS.Name);
- if (File.empty())
- return make_error<CodeViewError>(
- cv_error_code::corrupt_record,
- "TypeServer2Record does not contain filename!");
-
- for (auto &Path : SearchPaths) {
- SmallString<64> PathStr = Path.getKey();
- sys::path::append(PathStr, File);
- if (!sys::fs::exists(PathStr))
- continue;
-
- std::unique_ptr<IPDBSession> ThisSession;
- if (auto EC = loadDataForPDB(PDB_ReaderType::Native, PathStr, ThisSession)) {
- // It is not an error if this PDB fails to load, it just means that it
- // doesn't match and we should continue searching.
- ignoreErrors(std::move(EC));
- continue;
- }
-
- std::unique_ptr<NativeSession> NS(
- static_cast<NativeSession *>(ThisSession.release()));
- PDBFile &File = NS->getPDBFile();
- auto ExpectedInfo = File.getPDBInfoStream();
- // All PDB Files should have an Info stream.
- if (!ExpectedInfo)
- return ExpectedInfo.takeError();
-
- // Just because a file with a matching name was found and it was an actual
- // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
- // must match the GUID specified in the TypeServer2 record.
- ArrayRef<uint8_t> GuidBytes(ExpectedInfo->getGuid().Guid);
- StringRef GuidStr(reinterpret_cast<const char *>(GuidBytes.begin()),
- GuidBytes.size());
- if (GuidStr != TS.Guid)
- continue;
-
- Session = std::move(NS);
- return handleInternal(File, Callbacks);
- }
-
- // We couldn't find a matching PDB, so let it be handled by someone else.
- return false;
-}
diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index 91b8d648fcf9..77a2d57a8369 100644
--- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -11,101 +11,79 @@
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/Support/JamCRC.h"
using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::pdb;
// Corresponds to `fUDTAnon`.
-template <typename T> static bool isAnonymous(T &Rec) {
- StringRef Name = Rec.getName();
+static bool isAnonymous(StringRef Name) {
return Name == "<unnamed-tag>" || Name == "__unnamed" ||
Name.endswith("::<unnamed-tag>") || Name.endswith("::__unnamed");
}
-// Computes a hash for a given TPI record.
-template <typename T>
-static uint32_t getTpiHash(T &Rec, ArrayRef<uint8_t> FullRecord) {
- auto Opts = static_cast<uint16_t>(Rec.getOptions());
-
- bool ForwardRef =
- Opts & static_cast<uint16_t>(ClassOptions::ForwardReference);
- bool Scoped = Opts & static_cast<uint16_t>(ClassOptions::Scoped);
- bool UniqueName = Opts & static_cast<uint16_t>(ClassOptions::HasUniqueName);
- bool IsAnon = UniqueName && isAnonymous(Rec);
+// Computes the hash for a user-defined type record. This could be a struct,
+// class, union, or enum.
+static uint32_t getHashForUdt(const TagRecord &Rec,
+ ArrayRef<uint8_t> FullRecord) {
+ ClassOptions Opts = Rec.getOptions();
+ bool ForwardRef = bool(Opts & ClassOptions::ForwardReference);
+ bool Scoped = bool(Opts & ClassOptions::Scoped);
+ bool HasUniqueName = bool(Opts & ClassOptions::HasUniqueName);
+ bool IsAnon = HasUniqueName && isAnonymous(Rec.getName());
if (!ForwardRef && !Scoped && !IsAnon)
return hashStringV1(Rec.getName());
- if (!ForwardRef && UniqueName && !IsAnon)
+ if (!ForwardRef && HasUniqueName && !IsAnon)
return hashStringV1(Rec.getUniqueName());
return hashBufferV8(FullRecord);
}
-template <typename T> static uint32_t getSourceLineHash(T &Rec) {
- char Buf[4];
- support::endian::write32le(Buf, Rec.getUDT().getIndex());
- return hashStringV1(StringRef(Buf, 4));
-}
-
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR,
- UdtSourceLineRecord &Rec) {
- CVR.Hash = getSourceLineHash(Rec);
-}
-
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR,
- UdtModSourceLineRecord &Rec) {
- CVR.Hash = getSourceLineHash(Rec);
-}
-
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, ClassRecord &Rec) {
- CVR.Hash = getTpiHash(Rec, CVR.data());
-}
-
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, EnumRecord &Rec) {
- CVR.Hash = getTpiHash(Rec, CVR.data());
-}
-
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, UnionRecord &Rec) {
- CVR.Hash = getTpiHash(Rec, CVR.data());
-}
-
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UdtSourceLineRecord &Rec) {
- return verifySourceLine(Rec.getUDT());
-}
-
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR,
- UdtModSourceLineRecord &Rec) {
- return verifySourceLine(Rec.getUDT());
-}
-
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR, ClassRecord &Rec) {
- if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
- return errorInvalidHash();
- return Error::success();
-}
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR, EnumRecord &Rec) {
- if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
- return errorInvalidHash();
- return Error::success();
-}
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UnionRecord &Rec) {
- if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
- return errorInvalidHash();
- return Error::success();
+template <typename T>
+static Expected<uint32_t> getHashForUdt(const CVType &Rec) {
+ T Deserialized;
+ if (auto E = TypeDeserializer::deserializeAs(const_cast<CVType &>(Rec),
+ Deserialized))
+ return std::move(E);
+ return getHashForUdt(Deserialized, Rec.data());
}
-Error TpiHashVerifier::verifySourceLine(codeview::TypeIndex TI) {
+template <typename T>
+static Expected<uint32_t> getSourceLineHash(const CVType &Rec) {
+ T Deserialized;
+ if (auto E = TypeDeserializer::deserializeAs(const_cast<CVType &>(Rec),
+ Deserialized))
+ return std::move(E);
char Buf[4];
- support::endian::write32le(Buf, TI.getIndex());
- uint32_t Hash = hashStringV1(StringRef(Buf, 4));
- if (Hash % NumHashBuckets != HashValues[Index])
- return errorInvalidHash();
- return Error::success();
+ support::endian::write32le(Buf, Deserialized.getUDT().getIndex());
+ return hashStringV1(StringRef(Buf, 4));
}
-Error TpiHashVerifier::visitTypeBegin(CVType &Rec) {
- ++Index;
- RawRecord = Rec;
- return Error::success();
+Expected<uint32_t> llvm::pdb::hashTypeRecord(const CVType &Rec) {
+ switch (Rec.kind()) {
+ case LF_CLASS:
+ case LF_STRUCTURE:
+ case LF_INTERFACE:
+ return getHashForUdt<ClassRecord>(Rec);
+ case LF_UNION:
+ return getHashForUdt<UnionRecord>(Rec);
+ case LF_ENUM:
+ return getHashForUdt<EnumRecord>(Rec);
+
+ case LF_UDT_SRC_LINE:
+ return getSourceLineHash<UdtSourceLineRecord>(Rec);
+ case LF_UDT_MOD_SRC_LINE:
+ return getSourceLineHash<UdtModSourceLineRecord>(Rec);
+
+ default:
+ break;
+ }
+
+ // Run CRC32 over the bytes. This corresponds to `hashBufv8`.
+ JamCRC JC(/*Init=*/0U);
+ ArrayRef<char> Bytes(reinterpret_cast<const char *>(Rec.data().data()),
+ Rec.data().size());
+ JC.update(Bytes);
+ return JC.getCRC();
}
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index f917ef91f639..d3ef87d9009d 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -14,7 +14,6 @@
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp
index faf1142ddf17..c291185bc67a 100644
--- a/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -260,12 +260,6 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
return OS;
}
-raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UniqueId &Guid) {
- codeview::detail::GuidAdapter A(Guid.Guid);
- A.format(OS, "");
- return OS;
-}
-
raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UdtType &Type) {
switch (Type) {
CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Class, "class", OS)
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index adca0eeb08b4..43461de4c491 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -288,7 +288,6 @@ private:
HalfDiffKindBits);
addRelocationForSection(R, SectionAID);
- addRelocationForSection(R, SectionBID);
return ++RelI;
}
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index fa743c280e86..bc744890b997 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -46,7 +46,6 @@ if ( LLVM_USE_SANITIZE_COVERAGE OR CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" )
FuzzerShmemPosix.cpp
FuzzerShmemWindows.cpp
FuzzerTracePC.cpp
- FuzzerTraceState.cpp
FuzzerUtil.cpp
FuzzerUtilDarwin.cpp
FuzzerUtilLinux.cpp
diff --git a/lib/Fuzzer/FuzzerCorpus.h b/lib/Fuzzer/FuzzerCorpus.h
index 218ae5b6ac4d..bae0aea78f13 100644
--- a/lib/Fuzzer/FuzzerCorpus.h
+++ b/lib/Fuzzer/FuzzerCorpus.h
@@ -34,7 +34,8 @@ struct InputInfo {
size_t NumExecutedMutations = 0;
size_t NumSuccessfullMutations = 0;
bool MayDeleteFile = false;
- std::vector<uint32_t> FeatureSet;
+ bool Reduced = false;
+ std::vector<uint32_t> UniqFeatureSet;
};
class InputCorpus {
@@ -79,7 +80,8 @@ class InputCorpus {
II.U = U;
II.NumFeatures = NumFeatures;
II.MayDeleteFile = MayDeleteFile;
- II.FeatureSet = FeatureSet;
+ II.UniqFeatureSet = FeatureSet;
+ std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end());
ComputeSHA1(U.data(), U.size(), II.Sha1);
Hashes.insert(Sha1ToString(II.Sha1));
UpdateCorpusDistribution();
@@ -117,34 +119,21 @@ class InputCorpus {
Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size());
PrintUnit(II->U);
Printf(" ");
- PrintFeatureSet(II->FeatureSet);
+ PrintFeatureSet(II->UniqFeatureSet);
Printf("\n");
}
i++;
}
}
- // If FeatureSet is that same as in II, replace II->U with {Data,Size}.
- bool TryToReplace(InputInfo *II, const uint8_t *Data, size_t Size,
- const std::vector<uint32_t> &FeatureSet) {
- if (II->U.size() > Size && II->FeatureSet.size() &&
- II->FeatureSet == FeatureSet) {
- if (FeatureDebug)
- Printf("Replace: %zd => %zd\n", II->U.size(), Size);
- Replace(II, {Data, Data + Size});
- PrintCorpus();
- return true;
- }
- return false;
- }
-
void Replace(InputInfo *II, const Unit &U) {
- assert(II->U.size());
+ assert(II->U.size() > U.size());
Hashes.erase(Sha1ToString(II->Sha1));
DeleteFile(*II);
ComputeSHA1(U.data(), U.size(), II->Sha1);
Hashes.insert(Sha1ToString(II->Sha1));
II->U = U;
+ II->Reduced = true;
}
bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
@@ -198,7 +187,7 @@ class InputCorpus {
Printf("EVICTED %zd\n", Idx);
}
- void AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
+ bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
assert(NewSize);
Idx = Idx % kFeatureSetSize;
uint32_t OldSize = GetFeature(Idx);
@@ -218,8 +207,9 @@ class InputCorpus {
Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize);
SmallestElementPerFeature[Idx] = Inputs.size();
InputSizesPerFeature[Idx] = NewSize;
- CountingFeatures = true;
+ return true;
}
+ return false;
}
size_t NumFeatures() const { return NumAddedFeatures; }
@@ -238,7 +228,6 @@ private:
size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; }
void ValidateFeatureSet() {
- if (!CountingFeatures) return;
if (FeatureDebug)
PrintFeatureSet();
for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++)
@@ -256,14 +245,12 @@ private:
// Must be called whenever the corpus or unit weights are changed.
void UpdateCorpusDistribution() {
size_t N = Inputs.size();
+ assert(N);
Intervals.resize(N + 1);
Weights.resize(N);
std::iota(Intervals.begin(), Intervals.end(), 0);
- if (CountingFeatures)
- for (size_t i = 0; i < N; i++)
- Weights[i] = Inputs[i]->NumFeatures * (i + 1);
- else
- std::iota(Weights.begin(), Weights.end(), 1);
+ for (size_t i = 0; i < N; i++)
+ Weights[i] = Inputs[i]->NumFeatures * (i + 1);
CorpusDistribution = std::piecewise_constant_distribution<double>(
Intervals.begin(), Intervals.end(), Weights.begin());
}
@@ -275,7 +262,6 @@ private:
std::unordered_set<std::string> Hashes;
std::vector<InputInfo*> Inputs;
- bool CountingFeatures = false;
size_t NumAddedFeatures = 0;
size_t NumUpdatedFeatures = 0;
uint32_t InputSizesPerFeature[kFeatureSetSize];
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 87968893853e..fd8cab38a7bb 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -186,7 +186,11 @@ static void ParseFlags(const std::vector<std::string> &Args) {
}
Inputs = new std::vector<std::string>;
for (size_t A = 1; A < Args.size(); A++) {
- if (ParseOneFlag(Args[A].c_str())) continue;
+ if (ParseOneFlag(Args[A].c_str())) {
+ if (Flags.ignore_remaining_args)
+ break;
+ continue;
+ }
Inputs->push_back(Args[A]);
}
}
@@ -356,16 +360,17 @@ int MinimizeCrashInput(const std::vector<std::string> &Args,
exit(1);
}
std::string InputFilePath = Inputs->at(0);
- std::string BaseCmd =
- CloneArgsWithoutX(Args, "minimize_crash", "exact_artifact_path");
- auto InputPos = BaseCmd.find(" " + InputFilePath + " ");
+ auto BaseCmd = SplitBefore(
+ "-ignore_remaining_args=1",
+ CloneArgsWithoutX(Args, "minimize_crash", "exact_artifact_path"));
+ auto InputPos = BaseCmd.first.find(" " + InputFilePath + " ");
assert(InputPos != std::string::npos);
- BaseCmd.erase(InputPos, InputFilePath.size() + 1);
+ BaseCmd.first.erase(InputPos, InputFilePath.size() + 1);
if (Flags.runs <= 0 && Flags.max_total_time == 0) {
Printf("INFO: you need to specify -runs=N or "
"-max_total_time=N with -minimize_crash=1\n"
"INFO: defaulting to -max_total_time=600\n");
- BaseCmd += " -max_total_time=600";
+ BaseCmd.first += " -max_total_time=600";
}
auto LogFilePath = DirPlusFile(
@@ -378,7 +383,8 @@ int MinimizeCrashInput(const std::vector<std::string> &Args,
Printf("CRASH_MIN: minimizing crash input: '%s' (%zd bytes)\n",
CurrentFilePath.c_str(), U.size());
- auto Cmd = BaseCmd + " " + CurrentFilePath + LogFileRedirect;
+ auto Cmd = BaseCmd.first + " " + CurrentFilePath + LogFileRedirect + " " +
+ BaseCmd.second;
Printf("CRASH_MIN: executing: %s\n", Cmd.c_str());
int ExitCode = ExecuteCommand(Cmd);
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 5e70cbad3cf1..526805705b20 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -121,6 +121,9 @@ FUZZER_FLAG_STRING(exit_on_src_pos, "Exit if a newly found PC originates"
FUZZER_FLAG_STRING(exit_on_item, "Exit if an item with a given sha1 sum"
" was added to the corpus. "
"Used primarily for testing libFuzzer itself.")
+FUZZER_FLAG_INT(ignore_remaining_args, 0, "If 1, ignore all arguments passed "
+ "after this one. Useful for fuzzers that need to do their own "
+ "argument parsing.")
FUZZER_FLAG_STRING(run_equivalence_server, "Experimental")
FUZZER_FLAG_STRING(use_equivalence_server, "Experimental")
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index a732f895375e..3fc3fe004cef 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -38,7 +38,6 @@ public:
void Loop();
void MinimizeCrashLoop(const Unit &U);
void ShuffleAndMinimize(UnitVector *V);
- void InitializeTraceState();
void RereadOutputCorpus(size_t MaxSize);
size_t secondsSinceProcessStartUp() {
@@ -100,19 +99,10 @@ private:
void WriteToOutputCorpus(const Unit &U);
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
- void PrintStatusForNewUnit(const Unit &U);
+ void PrintStatusForNewUnit(const Unit &U, const char *Text);
void ShuffleCorpus(UnitVector *V);
void CheckExitOnSrcPosOrItem();
- // Trace-based fuzzing: we run a unit with some kind of tracing
- // enabled and record potentially useful mutations. Then
- // We apply these mutations one by one to the unit and run it again.
-
- // Start tracing; forget all previously proposed mutations.
- void StartTraceRecording();
- // Stop tracing.
- void StopTraceRecording();
-
static void StaticDeathCallback();
void DumpCurrentUnit(const char *Prefix);
void DeathCallback();
@@ -142,7 +132,7 @@ private:
size_t MaxInputLen = 0;
size_t MaxMutationLen = 0;
- std::vector<uint32_t> FeatureSetTmp;
+ std::vector<uint32_t> UniqFeatureSetTmp;
// Need to know our own thread.
static thread_local bool IsMyThread;
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 6816f3af8a6f..8ac7a847aef7 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -114,7 +114,6 @@ Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
: CB(CB), Corpus(Corpus), MD(MD), Options(Options) {
if (EF->__sanitizer_set_death_callback)
EF->__sanitizer_set_death_callback(StaticDeathCallback);
- InitializeTraceState();
assert(!F);
F = this;
TPC.ResetMaps();
@@ -403,22 +402,29 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
ExecuteCallback(Data, Size);
- FeatureSetTmp.clear();
+ UniqFeatureSetTmp.clear();
+ size_t FoundUniqFeaturesOfII = 0;
size_t NumUpdatesBefore = Corpus.NumFeatureUpdates();
TPC.CollectFeatures([&](size_t Feature) {
- Corpus.AddFeature(Feature, Size, Options.Shrink);
- if (Options.ReduceInputs)
- FeatureSetTmp.push_back(Feature);
+ if (Corpus.AddFeature(Feature, Size, Options.Shrink))
+ UniqFeatureSetTmp.push_back(Feature);
+ if (Options.ReduceInputs && II)
+ if (std::binary_search(II->UniqFeatureSet.begin(),
+ II->UniqFeatureSet.end(), Feature))
+ FoundUniqFeaturesOfII++;
});
PrintPulseAndReportSlowInput(Data, Size);
size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore;
if (NumNewFeatures) {
Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
- FeatureSetTmp);
+ UniqFeatureSetTmp);
CheckExitOnSrcPosOrItem();
return true;
}
- if (II && Corpus.TryToReplace(II, Data, Size, FeatureSetTmp)) {
+ if (II && FoundUniqFeaturesOfII &&
+ FoundUniqFeaturesOfII == II->UniqFeatureSet.size() &&
+ II->U.size() > Size) {
+ Corpus.Replace(II, {Data, Data + Size});
CheckExitOnSrcPosOrItem();
return true;
}
@@ -501,10 +507,10 @@ void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) {
Printf("Base64: %s\n", Base64(U).c_str());
}
-void Fuzzer::PrintStatusForNewUnit(const Unit &U) {
+void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) {
if (!Options.PrintNEW)
return;
- PrintStats("NEW ", "");
+ PrintStats(Text, "");
if (Options.Verbosity) {
Printf(" L: %zd ", U.size());
MD.PrintMutationSequence();
@@ -515,7 +521,8 @@ void Fuzzer::PrintStatusForNewUnit(const Unit &U) {
void Fuzzer::ReportNewCoverage(InputInfo *II, const Unit &U) {
II->NumSuccessfullMutations++;
MD.RecordSuccessfulMutationSequence();
- PrintStatusForNewUnit(U);
+ PrintStatusForNewUnit(U, II->Reduced ? "REDUCE" :
+ "NEW ");
WriteToOutputCorpus(U);
NumberOfNewUnitsAdded++;
TPC.PrintNewPCs();
@@ -600,13 +607,10 @@ void Fuzzer::MutateAndTestOne() {
assert(NewSize > 0 && "Mutator returned empty unit");
assert(NewSize <= CurrentMaxMutationLen && "Mutator return overisized unit");
Size = NewSize;
- if (i == 0)
- StartTraceRecording();
II.NumExecutedMutations++;
if (RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II))
ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size});
- StopTraceRecording();
TryDetectingAMemoryLeak(CurrentUnitData, Size,
/*DuringInitialCorpusExecution*/ false);
}
diff --git a/lib/Fuzzer/FuzzerMerge.cpp b/lib/Fuzzer/FuzzerMerge.cpp
index 612f4bbb28f2..616c0999aa39 100644
--- a/lib/Fuzzer/FuzzerMerge.cpp
+++ b/lib/Fuzzer/FuzzerMerge.cpp
@@ -241,7 +241,6 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
return true;
});
// Show stats.
- TotalNumberOfRuns++;
if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)))
PrintStats("pulse ");
// Write the post-run marker and the coverage.
@@ -286,12 +285,13 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
// Execute the inner process untill it passes.
// Every inner process should execute at least one input.
- std::string BaseCmd = CloneArgsWithoutX(Args, "keep-all-flags");
+ auto BaseCmd = SplitBefore("-ignore_remaining_args=1",
+ CloneArgsWithoutX(Args, "keep-all-flags"));
bool Success = false;
for (size_t i = 1; i <= AllFiles.size(); i++) {
Printf("MERGE-OUTER: attempt %zd\n", i);
- auto ExitCode =
- ExecuteCommand(BaseCmd + " -merge_control_file=" + CFPath);
+ auto ExitCode = ExecuteCommand(BaseCmd.first + " -merge_control_file=" +
+ CFPath + " " + BaseCmd.second);
if (!ExitCode) {
Printf("MERGE-OUTER: succesfull in %zd attempt(s)\n", i);
Success = true;
diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp
index 53cb9027e455..5998ef9d3193 100644
--- a/lib/Fuzzer/FuzzerMutate.cpp
+++ b/lib/Fuzzer/FuzzerMutate.cpp
@@ -43,8 +43,6 @@ MutationDispatcher::MutationDispatcher(Random &Rand,
{&MutationDispatcher::Mutate_CrossOver, "CrossOver"},
{&MutationDispatcher::Mutate_AddWordFromManualDictionary,
"ManualDict"},
- {&MutationDispatcher::Mutate_AddWordFromTemporaryAutoDictionary,
- "TempAutoDict"},
{&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary,
"PersAutoDict"},
});
@@ -165,11 +163,6 @@ size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data,
return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize);
}
-size_t MutationDispatcher::Mutate_AddWordFromTemporaryAutoDictionary(
- uint8_t *Data, size_t Size, size_t MaxSize) {
- return AddWordFromDictionary(TempAutoDictionary, Data, Size, MaxSize);
-}
-
size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size,
size_t MaxSize,
DictionaryEntry &DE) {
@@ -251,7 +244,7 @@ size_t MutationDispatcher::Mutate_AddWordFromTORC(
uint8_t *Data, size_t Size, size_t MaxSize) {
Word W;
DictionaryEntry DE;
- switch (Rand(3)) {
+ switch (Rand(4)) {
case 0: {
auto X = TPC.TORC8.Get(Rand.Rand());
DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size);
@@ -267,6 +260,10 @@ size_t MutationDispatcher::Mutate_AddWordFromTORC(
auto X = TPC.TORCW.Get(Rand.Rand());
DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size);
} break;
+ case 3: if (Options.UseMemmem) {
+ auto X = TPC.MMT.Get(Rand.Rand());
+ DE = DictionaryEntry(X);
+ } break;
default:
assert(0);
}
@@ -533,14 +530,4 @@ void MutationDispatcher::AddWordToManualDictionary(const Word &W) {
{W, std::numeric_limits<size_t>::max()});
}
-void MutationDispatcher::AddWordToAutoDictionary(DictionaryEntry DE) {
- static const size_t kMaxAutoDictSize = 1 << 14;
- if (TempAutoDictionary.size() >= kMaxAutoDictSize) return;
- TempAutoDictionary.push_back(DE);
-}
-
-void MutationDispatcher::ClearAutoDictionary() {
- TempAutoDictionary.clear();
-}
-
} // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerMutate.h b/lib/Fuzzer/FuzzerMutate.h
index 8c8fb3fd74c7..84b04c0dbf3e 100644
--- a/lib/Fuzzer/FuzzerMutate.h
+++ b/lib/Fuzzer/FuzzerMutate.h
@@ -52,10 +52,6 @@ public:
size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size,
size_t MaxSize);
- /// Mutates data by adding a word from the temporary automatic dictionary.
- size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size,
- size_t MaxSize);
-
/// Mutates data by adding a word from the TORC.
size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize);
@@ -84,8 +80,6 @@ public:
void AddWordToManualDictionary(const Word &W);
- void AddWordToAutoDictionary(DictionaryEntry DE);
- void ClearAutoDictionary();
void PrintRecommendedDictionary();
void SetCorpus(const InputCorpus *Corpus) { this->Corpus = Corpus; }
diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp
index 6f5c7be41062..ced0a2133340 100644
--- a/lib/Fuzzer/FuzzerTracePC.cpp
+++ b/lib/Fuzzer/FuzzerTracePC.cpp
@@ -37,6 +37,8 @@ namespace fuzzer {
TracePC TPC;
+int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr;
+
uint8_t *TracePC::Counters() const {
return __sancov_trace_pc_guard_8bit_counters;
}
@@ -293,6 +295,20 @@ void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) {
ValueProfileMap.AddValue(Idx);
}
+static size_t InternalStrnlen(const char *S, size_t MaxLen) {
+ size_t Len = 0;
+ for (; Len < MaxLen && S[Len]; Len++) {}
+ return Len;
+}
+
+// Finds min of (strlen(S1), strlen(S2)).
+// Needed bacause one of these strings may actually be non-zero terminated.
+static size_t InternalStrnlen2(const char *S1, const char *S2) {
+ size_t Len = 0;
+ for (; S1[Len] && S2[Len]; Len++) {}
+ return Len;
+}
+
} // namespace fuzzer
extern "C" {
@@ -415,4 +431,71 @@ void __sanitizer_cov_trace_gep(uintptr_t Idx) {
uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
fuzzer::TPC.HandleCmp(PC, Idx, (uintptr_t)0);
}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
+ const void *s2, size_t n, int result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+ if (result == 0) return; // No reason to mutate.
+ if (n <= 1) return; // Not interesting.
+ fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/false);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
+ const char *s2, size_t n, int result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+ if (result == 0) return; // No reason to mutate.
+ size_t Len1 = fuzzer::InternalStrnlen(s1, n);
+ size_t Len2 = fuzzer::InternalStrnlen(s2, n);
+ n = std::min(n, Len1);
+ n = std::min(n, Len2);
+ if (n <= 1) return; // Not interesting.
+ fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/true);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
+ const char *s2, int result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+ if (result == 0) return; // No reason to mutate.
+ size_t N = fuzzer::InternalStrnlen2(s1, s2);
+ if (N <= 1) return; // Not interesting.
+ fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, N, /*StopAtZero*/true);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
+ const char *s2, size_t n, int result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+ return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
+ const char *s2, int result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+ return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
+ const char *s2, char *result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+ fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
+ const char *s2, char *result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+ fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
+ const void *s2, size_t len2, void *result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+ fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), len2);
+}
} // extern "C"
diff --git a/lib/Fuzzer/FuzzerTracePC.h b/lib/Fuzzer/FuzzerTracePC.h
index 5ec8c590b4df..b36c4f54306c 100644
--- a/lib/Fuzzer/FuzzerTracePC.h
+++ b/lib/Fuzzer/FuzzerTracePC.h
@@ -45,6 +45,28 @@ struct TableOfRecentCompares {
Pair Table[kSize];
};
+template <size_t kSizeT>
+struct MemMemTable {
+ static const size_t kSize = kSizeT;
+ Word MemMemWords[kSize];
+ Word EmptyWord;
+
+ void Add(const uint8_t *Data, size_t Size) {
+ if (Size <= 2) return;
+ Size = std::min(Size, Word::GetMaxSize());
+ size_t Idx = SimpleFastHash(Data, Size) % kSize;
+ MemMemWords[Idx].Set(Data, Size);
+ }
+ const Word &Get(size_t Idx) {
+ for (size_t i = 0; i < kSize; i++) {
+ const Word &W = MemMemWords[(Idx + i) % kSize];
+ if (W.size()) return W;
+ }
+ EmptyWord.Set(nullptr, 0);
+ return EmptyWord;
+ }
+};
+
class TracePC {
public:
static const size_t kNumPCs = 1 << 21;
@@ -81,6 +103,7 @@ class TracePC {
TableOfRecentCompares<uint32_t, 32> TORC4;
TableOfRecentCompares<uint64_t, 32> TORC8;
TableOfRecentCompares<Word, 32> TORCW;
+ MemMemTable<1024> MMT;
void PrintNewPCs();
void InitializePrintNewPCs();
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
deleted file mode 100644
index 8670e2ad6727..000000000000
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-//===- FuzzerTraceState.cpp - Trace-based fuzzer mutator ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Data tracing.
-//===----------------------------------------------------------------------===//
-
-#include "FuzzerDictionary.h"
-#include "FuzzerIO.h"
-#include "FuzzerInternal.h"
-#include "FuzzerMutate.h"
-#include "FuzzerTracePC.h"
-#include <algorithm>
-#include <cstring>
-#include <map>
-#include <set>
-#include <thread>
-
-namespace fuzzer {
-
-// Declared as static globals for faster checks inside the hooks.
-static bool RecordingMemmem = false;
-
-int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr;
-
-class TraceState {
-public:
- TraceState(MutationDispatcher &MD, const FuzzingOptions &Options,
- const Fuzzer *F)
- : MD(MD), Options(Options), F(F) {}
-
- void StartTraceRecording() {
- if (!Options.UseMemmem)
- return;
- RecordingMemmem = true;
- InterestingWords.clear();
- MD.ClearAutoDictionary();
- }
-
- void StopTraceRecording() {
- if (!RecordingMemmem)
- return;
- for (auto &W : InterestingWords)
- MD.AddWordToAutoDictionary({W});
- }
-
- void AddInterestingWord(const uint8_t *Data, size_t Size) {
- if (!RecordingMemmem || !F->InFuzzingThread()) return;
- if (Size <= 1) return;
- Size = std::min(Size, Word::GetMaxSize());
- Word W(Data, Size);
- InterestingWords.insert(W);
- }
-
- private:
-
- // TODO: std::set is too inefficient, need to have a custom DS here.
- std::set<Word> InterestingWords;
- MutationDispatcher &MD;
- const FuzzingOptions Options;
- const Fuzzer *F;
-};
-
-static TraceState *TS;
-
-void Fuzzer::StartTraceRecording() {
- if (!TS) return;
- TS->StartTraceRecording();
-}
-
-void Fuzzer::StopTraceRecording() {
- if (!TS) return;
- TS->StopTraceRecording();
-}
-
-void Fuzzer::InitializeTraceState() {
- if (!Options.UseMemmem) return;
- TS = new TraceState(MD, Options, this);
-}
-
-static size_t InternalStrnlen(const char *S, size_t MaxLen) {
- size_t Len = 0;
- for (; Len < MaxLen && S[Len]; Len++) {}
- return Len;
-}
-
-// Finds min of (strlen(S1), strlen(S2)).
-// Needed bacause one of these strings may actually be non-zero terminated.
-static size_t InternalStrnlen2(const char *S1, const char *S2) {
- size_t Len = 0;
- for (; S1[Len] && S2[Len]; Len++) {}
- return Len;
-}
-
-} // namespace fuzzer
-
-using fuzzer::TS;
-
-extern "C" {
-
-// We may need to avoid defining weak hooks to stay compatible with older clang.
-#ifndef LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
-# define LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS 1
-#endif
-
-#if LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
-
-ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
-void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
- const void *s2, size_t n, int result) {
- if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
- if (result == 0) return; // No reason to mutate.
- if (n <= 1) return; // Not interesting.
- fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/false);
-}
-
-ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
-void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
- const char *s2, size_t n, int result) {
- if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
- if (result == 0) return; // No reason to mutate.
- size_t Len1 = fuzzer::InternalStrnlen(s1, n);
- size_t Len2 = fuzzer::InternalStrnlen(s2, n);
- n = std::min(n, Len1);
- n = std::min(n, Len2);
- if (n <= 1) return; // Not interesting.
- fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/true);
-}
-
-
-ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
-void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
- const char *s2, int result) {
- if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
- if (result == 0) return; // No reason to mutate.
- size_t N = fuzzer::InternalStrnlen2(s1, s2);
- if (N <= 1) return; // Not interesting.
- fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, N, /*StopAtZero*/true);
-}
-
-ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
-void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
- const char *s2, size_t n, int result) {
- if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
- return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result);
-}
-
-ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
-void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
- const char *s2, int result) {
- if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
- return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result);
-}
-
-ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
-void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
- const char *s2, char *result) {
- if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
- TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
-}
-
-ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
-void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
- const char *s2, char *result) {
- if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
- TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
-}
-
-ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
-void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
- const void *s2, size_t len2, void *result) {
- if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
- TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), len2);
-}
-
-#endif // LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
-} // extern "C"
diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp
index f5fd3a85187c..2d95f40e46a1 100644
--- a/lib/Fuzzer/FuzzerUtil.cpp
+++ b/lib/Fuzzer/FuzzerUtil.cpp
@@ -215,4 +215,11 @@ bool ExecuteCommandAndReadOutput(const std::string &Command, std::string *Out) {
return true;
}
+size_t SimpleFastHash(const uint8_t *Data, size_t Size) {
+ size_t Res = 0;
+ for (size_t i = 0; i < Size; i++)
+ Res = Res * 11 + Data[i];
+ return Res;
+}
+
} // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerUtil.h b/lib/Fuzzer/FuzzerUtil.h
index f84fd9ef0fce..62d6e61dcf17 100644
--- a/lib/Fuzzer/FuzzerUtil.h
+++ b/lib/Fuzzer/FuzzerUtil.h
@@ -67,10 +67,20 @@ inline std::string CloneArgsWithoutX(const std::vector<std::string> &Args,
return CloneArgsWithoutX(Args, X, X);
}
+inline std::pair<std::string, std::string> SplitBefore(std::string X,
+ std::string S) {
+ auto Pos = S.find(X);
+ if (Pos == std::string::npos)
+ return std::make_pair(S, "");
+ return std::make_pair(S.substr(0, Pos), S.substr(Pos));
+}
+
std::string DisassembleCmd(const std::string &FileName);
std::string SearchRegexCmd(const std::string &Regex);
+size_t SimpleFastHash(const uint8_t *Data, size_t Size);
+
} // namespace fuzzer
#endif // LLVM_FUZZER_UTIL_H
diff --git a/lib/Fuzzer/afl/afl_driver.cpp b/lib/Fuzzer/afl/afl_driver.cpp
index d0521bdfdd67..15bceb896e17 100644
--- a/lib/Fuzzer/afl/afl_driver.cpp
+++ b/lib/Fuzzer/afl/afl_driver.cpp
@@ -22,8 +22,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
return 0;
}
EOF
-# Build your target with -fsanitize-coverage=trace-pc using fresh clang.
-clang -g -fsanitize-coverage=trace-pc test_fuzzer.cc -c
+# Build your target with -fsanitize-coverage=trace-pc-guard using fresh clang.
+clang -g -fsanitize-coverage=trace-pc-guard test_fuzzer.cc -c
# Build afl-llvm-rt.o.c from the AFL distribution.
clang -c -w $AFL_HOME/llvm_mode/afl-llvm-rt.o.c
# Build this file, link it with afl-llvm-rt.o.o and the target code.
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index 30566bdc87ae..43aea2b7a186 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -90,6 +90,7 @@ set(Tests
EmptyTest
EquivalenceATest
EquivalenceBTest
+ FlagsTest
FourIndependentBranchesTest
FullCoverageSetTest
InitializeTest
@@ -272,5 +273,5 @@ add_lit_testsuite(check-fuzzer "Running Fuzzer tests"
# Don't add dependencies on Windows. The linker step would fail on Windows,
# since cmake will use link.exe for linking and won't include compiler-rt libs.
if(NOT MSVC)
- add_dependencies(check-fuzzer FileCheck sancov not llvm-symbolizer)
+ add_dependencies(check-fuzzer FileCheck sancov not)
endif()
diff --git a/lib/Fuzzer/test/FlagsTest.cpp b/lib/Fuzzer/test/FlagsTest.cpp
new file mode 100644
index 000000000000..ac64b9d48df5
--- /dev/null
+++ b/lib/Fuzzer/test/FlagsTest.cpp
@@ -0,0 +1,32 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Parse some flags
+#include <string>
+#include <vector>
+
+static std::vector<std::string> Flags;
+
+extern "C" int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
+ // Parse --flags and anything after -ignore_remaining_args=1 is passed.
+ int I = 1;
+ while (I < *Argc) {
+ std::string S((*Argv)[I++]);
+ if (S == "-ignore_remaining_args=1")
+ break;
+ if (S.substr(0, 2) == "--")
+ Flags.push_back(S);
+ }
+ while (I < *Argc)
+ Flags.push_back(std::string((*Argv)[I++]));
+
+ return 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ fprintf(stderr, "BINGO ");
+ for (auto Flag : Flags)
+ fprintf(stderr, "%s ", Flag.c_str());
+ fprintf(stderr, "\n");
+ exit(0);
+}
diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp
index 1053c28527bf..eba2663029b2 100644
--- a/lib/Fuzzer/test/FuzzerUnittest.cpp
+++ b/lib/Fuzzer/test/FuzzerUnittest.cpp
@@ -424,35 +424,6 @@ TEST(FuzzerMutate, AddWordFromDictionary2) {
TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15);
}
-void TestAddWordFromDictionaryWithHint(Mutator M, int NumIter) {
- std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
- fuzzer::EF = t.get();
- Random Rand(0);
- MutationDispatcher MD(Rand, {});
- uint8_t W[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xFF, 0xEE, 0xEF};
- size_t PosHint = 7777;
- MD.AddWordToAutoDictionary({Word(W, sizeof(W)), PosHint});
- int FoundMask = 0;
- for (int i = 0; i < NumIter; i++) {
- uint8_t T[10000];
- memset(T, 0, sizeof(T));
- size_t NewSize = (MD.*M)(T, 9000, 10000);
- if (NewSize >= PosHint + sizeof(W) &&
- !memcmp(W, T + PosHint, sizeof(W)))
- FoundMask = 1;
- }
- EXPECT_EQ(FoundMask, 1);
-}
-
-TEST(FuzzerMutate, AddWordFromDictionaryWithHint1) {
- TestAddWordFromDictionaryWithHint(
- &MutationDispatcher::Mutate_AddWordFromTemporaryAutoDictionary, 1 << 5);
-}
-
-TEST(FuzzerMutate, AddWordFromDictionaryWithHint2) {
- TestAddWordFromDictionaryWithHint(&MutationDispatcher::Mutate, 1 << 10);
-}
-
void TestChangeASCIIInteger(Mutator M, int NumIter) {
std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
fuzzer::EF = t.get();
@@ -593,7 +564,7 @@ TEST(Corpus, Distribution) {
size_t N = 10;
size_t TriesPerUnit = 1<<16;
for (size_t i = 0; i < N; i++)
- C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 0, false, {});
+ C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 1, false, {});
std::vector<size_t> Hist(N);
for (size_t i = 0; i < N * TriesPerUnit; i++) {
diff --git a/lib/Fuzzer/test/fuzzer-flags.test b/lib/Fuzzer/test/fuzzer-flags.test
index 76ea27705750..976da2906d7c 100644
--- a/lib/Fuzzer/test/fuzzer-flags.test
+++ b/lib/Fuzzer/test/fuzzer-flags.test
@@ -1,10 +1,21 @@
-RUN: LLVMFuzzer-SimpleTest -foo_bar=1 2>&1 | FileCheck %s --check-prefix=FOO_BAR
+# Does not work on windows for unknown reason.
+UNSUPPORTED: windows
+
+RUN: LLVMFuzzer-FlagsTest -foo_bar=1 2>&1 | FileCheck %s --check-prefix=FOO_BAR
FOO_BAR: WARNING: unrecognized flag '-foo_bar=1'; use -help=1 to list all flags
FOO_BAR: BINGO
-RUN: LLVMFuzzer-SimpleTest -runs=10 --max_len=100 2>&1 | FileCheck %s --check-prefix=DASH_DASH
+RUN: LLVMFuzzer-FlagsTest -runs=10 --max_len=100 2>&1 | FileCheck %s --check-prefix=DASH_DASH
DASH_DASH: WARNING: did you mean '-max_len=100' (single dash)?
DASH_DASH: INFO: A corpus is not provided, starting from an empty corpus
-RUN: LLVMFuzzer-SimpleTest -help=1 2>&1 | FileCheck %s --check-prefix=NO_INTERNAL
+RUN: LLVMFuzzer-FlagsTest -help=1 2>&1 | FileCheck %s --check-prefix=NO_INTERNAL
NO_INTERNAL-NOT: internal flag
+
+RUN: LLVMFuzzer-FlagsTest --foo-bar -runs=10 -ignore_remaining_args=1 --baz -help=1 test 2>&1 | FileCheck %s --check-prefix=PASSTHRU
+PASSTHRU: BINGO --foo-bar --baz -help=1 test
+
+RUN: mkdir -p %t/T0 %t/T1
+RUN: touch %t/T1/empty
+RUN: LLVMFuzzer-FlagsTest --foo-bar -merge=1 %t/T0 %t/T1 -ignore_remaining_args=1 --baz -help=1 test 2>&1 | FileCheck %s --check-prefix=PASSTHRU-MERGE
+PASSTHRU-MERGE: BINGO --foo-bar --baz -help=1 test
diff --git a/lib/Fuzzer/test/fuzzer-traces-hooks.test b/lib/Fuzzer/test/fuzzer-traces-hooks.test
index f93a8b7199e2..77ca4b47bd01 100644
--- a/lib/Fuzzer/test/fuzzer-traces-hooks.test
+++ b/lib/Fuzzer/test/fuzzer-traces-hooks.test
@@ -10,7 +10,7 @@ RUN: not LLVMFuzzer-StrstrTest -seed=1 -runs=2000000 2>&1 | File
RUN: not LLVMFuzzer-Memcmp64BytesTest -seed=1 -runs=1000000 2>&1 | FileCheck %s
-RUN: LLVMFuzzer-RepeatedMemcmp -seed=11 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
+RUN: LLVMFuzzer-RepeatedMemcmp -seed=11 -runs=100000 -max_len=20 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
RECOMMENDED_DICT:###### Recommended dictionary. ######
RECOMMENDED_DICT-DAG: "foo"
RECOMMENDED_DICT-DAG: "bar"
diff --git a/lib/Fuzzer/test/reduce_inputs.test b/lib/Fuzzer/test/reduce_inputs.test
index a4a5c57123d3..5ce4440788f4 100644
--- a/lib/Fuzzer/test/reduce_inputs.test
+++ b/lib/Fuzzer/test/reduce_inputs.test
@@ -9,5 +9,6 @@ CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60'
RUN: LLVMFuzzer-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT
COUNT: READ units: 3
-
+# a bit longer test
+RUN: LLVMFuzzer-ShrinkControlFlowTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -seed=1 -reduce_inputs=1 -runs=1000000 2>&1 | FileCheck %s
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index 80371780fb6d..170bc544d53f 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -365,7 +365,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break;
case CallingConv::PTX_Device: Out << "ptx_device"; break;
case CallingConv::X86_64_SysV: Out << "x86_64_sysvcc"; break;
- case CallingConv::X86_64_Win64: Out << "x86_64_win64cc"; break;
+ case CallingConv::Win64: Out << "win64cc"; break;
case CallingConv::SPIR_FUNC: Out << "spir_func"; break;
case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break;
case CallingConv::Swift: Out << "swiftcc"; break;
@@ -1964,6 +1964,7 @@ static void writeDIImportedEntity(raw_ostream &Out, const DIImportedEntity *N,
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("entity", N->getRawEntity());
+ Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
Out << ")";
}
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index e31779c83e3a..f56fe7089807 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -44,8 +44,8 @@ bool Constant::isNegativeZeroValue() const {
// Equivalent for a vector of -0.0's.
if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
- if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
- if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
+ if (CV->getElementType()->isFloatingPointTy() && CV->isSplat())
+ if (CV->getElementAsAPFloat(0).isNegZero())
return true;
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
@@ -70,8 +70,8 @@ bool Constant::isZeroValue() const {
// Equivalent for a vector of -0.0's.
if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
- if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
- if (SplatCFP && SplatCFP->isZero())
+ if (CV->getElementType()->isFloatingPointTy() && CV->isSplat())
+ if (CV->getElementAsAPFloat(0).isZero())
return true;
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
@@ -113,9 +113,13 @@ bool Constant::isAllOnesValue() const {
return Splat->isAllOnesValue();
// Check for constant vectors which are splats of -1 values.
- if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
- if (Constant *Splat = CV->getSplatValue())
- return Splat->isAllOnesValue();
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) {
+ if (CV->isSplat()) {
+ if (CV->getElementType()->isFloatingPointTy())
+ return CV->getElementAsAPFloat(0).bitcastToAPInt().isAllOnesValue();
+ return CV->getElementAsAPInt(0).isAllOnesValue();
+ }
+ }
return false;
}
@@ -135,9 +139,13 @@ bool Constant::isOneValue() const {
return Splat->isOneValue();
// Check for constant vectors which are splats of 1 values.
- if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
- if (Constant *Splat = CV->getSplatValue())
- return Splat->isOneValue();
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) {
+ if (CV->isSplat()) {
+ if (CV->getElementType()->isFloatingPointTy())
+ return CV->getElementAsAPFloat(0).bitcastToAPInt().isOneValue();
+ return CV->getElementAsAPInt(0).isOneValue();
+ }
+ }
return false;
}
@@ -157,9 +165,13 @@ bool Constant::isMinSignedValue() const {
return Splat->isMinSignedValue();
// Check for constant vectors which are splats of INT_MIN values.
- if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
- if (Constant *Splat = CV->getSplatValue())
- return Splat->isMinSignedValue();
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) {
+ if (CV->isSplat()) {
+ if (CV->getElementType()->isFloatingPointTy())
+ return CV->getElementAsAPFloat(0).bitcastToAPInt().isMinSignedValue();
+ return CV->getElementAsAPInt(0).isMinSignedValue();
+ }
+ }
return false;
}
@@ -179,9 +191,13 @@ bool Constant::isNotMinSignedValue() const {
return Splat->isNotMinSignedValue();
// Check for constant vectors which are splats of INT_MIN values.
- if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
- if (Constant *Splat = CV->getSplatValue())
- return Splat->isNotMinSignedValue();
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) {
+ if (CV->isSplat()) {
+ if (CV->getElementType()->isFloatingPointTy())
+ return !CV->getElementAsAPFloat(0).bitcastToAPInt().isMinSignedValue();
+ return !CV->getElementAsAPInt(0).isMinSignedValue();
+ }
+ }
// It *may* contain INT_MIN, we can't tell.
return false;
@@ -2565,6 +2581,34 @@ uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const {
}
}
+APInt ConstantDataSequential::getElementAsAPInt(unsigned Elt) const {
+ assert(isa<IntegerType>(getElementType()) &&
+ "Accessor can only be used when element is an integer");
+ const char *EltPtr = getElementPointer(Elt);
+
+ // The data is stored in host byte order, make sure to cast back to the right
+ // type to load with the right endianness.
+ switch (getElementType()->getIntegerBitWidth()) {
+ default: llvm_unreachable("Invalid bitwidth for CDS");
+ case 8: {
+ auto EltVal = *reinterpret_cast<const uint8_t *>(EltPtr);
+ return APInt(8, EltVal);
+ }
+ case 16: {
+ auto EltVal = *reinterpret_cast<const uint16_t *>(EltPtr);
+ return APInt(16, EltVal);
+ }
+ case 32: {
+ auto EltVal = *reinterpret_cast<const uint32_t *>(EltPtr);
+ return APInt(32, EltVal);
+ }
+ case 64: {
+ auto EltVal = *reinterpret_cast<const uint64_t *>(EltPtr);
+ return APInt(64, EltVal);
+ }
+ }
+}
+
APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
const char *EltPtr = getElementPointer(Elt);
@@ -2623,17 +2667,21 @@ bool ConstantDataSequential::isCString() const {
return Str.drop_back().find(0) == StringRef::npos;
}
-Constant *ConstantDataVector::getSplatValue() const {
+bool ConstantDataVector::isSplat() const {
const char *Base = getRawDataValues().data();
// Compare elements 1+ to the 0'th element.
unsigned EltSize = getElementByteSize();
for (unsigned i = 1, e = getNumElements(); i != e; ++i)
if (memcmp(Base, Base+i*EltSize, EltSize))
- return nullptr;
+ return false;
+
+ return true;
+}
+Constant *ConstantDataVector::getSplatValue() const {
// If they're all the same, return the 0th one as a representative.
- return getElementAsConstant(0);
+ return isSplat() ? getElementAsConstant(0) : nullptr;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 2165ae5a9470..aba770457e2f 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -14,7 +14,6 @@
#include "llvm-c/Core.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 7e598b43ac16..bce28ba3b950 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -148,10 +148,13 @@ DICompileUnit *DIBuilder::createCompileUnit(
static DIImportedEntity *
createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
- Metadata *NS, unsigned Line, StringRef Name,
+ Metadata *NS, DIFile *File, unsigned Line, StringRef Name,
SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) {
+ if (Line)
+ assert(File && "Source location has line number but no file");
unsigned EntitiesCount = C.pImpl->DIImportedEntitys.size();
- auto *M = DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), Line, Name);
+ auto *M =
+ DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), File, Line, Name);
if (EntitiesCount < C.pImpl->DIImportedEntitys.size())
// A new Imported Entity was just added to the context.
// Add it to the Imported Modules list.
@@ -160,33 +163,38 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
- DINamespace *NS,
+ DINamespace *NS, DIFile *File,
unsigned Line) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, NS, Line, StringRef(), AllImportedModules);
+ Context, NS, File, Line, StringRef(),
+ AllImportedModules);
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
DIImportedEntity *NS,
- unsigned Line) {
+ DIFile *File, unsigned Line) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, NS, Line, StringRef(), AllImportedModules);
+ Context, NS, File, Line, StringRef(),
+ AllImportedModules);
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIModule *M,
- unsigned Line) {
+ DIFile *File, unsigned Line) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, M, Line, StringRef(), AllImportedModules);
+ Context, M, File, Line, StringRef(),
+ AllImportedModules);
}
DIImportedEntity *DIBuilder::createImportedDeclaration(DIScope *Context,
DINode *Decl,
+ DIFile *File,
unsigned Line,
StringRef Name) {
// Make sure to use the unique identifier based metadata reference for
// types that have one.
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
- Context, Decl, Line, Name, AllImportedModules);
+ Context, Decl, File, Line, Name,
+ AllImportedModules);
}
DIFile *DIBuilder::createFile(StringRef Filename, StringRef Directory,
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 0bf68b4c53bb..c14940bad45d 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -760,12 +760,13 @@ DIObjCProperty *DIObjCProperty::getImpl(
DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag,
Metadata *Scope, Metadata *Entity,
- unsigned Line, MDString *Name,
- StorageType Storage,
+ Metadata *File, unsigned Line,
+ MDString *Name, StorageType Storage,
bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DIImportedEntity, (Tag, Scope, Entity, Line, Name));
- Metadata *Ops[] = {Scope, Entity, Name};
+ DEFINE_GETIMPL_LOOKUP(DIImportedEntity,
+ (Tag, Scope, Entity, File, Line, Name));
+ Metadata *Ops[] = {Scope, Entity, Name, File};
DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops);
}
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index 9bd0e297f4ef..4d7e3040ecd7 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -61,24 +61,30 @@ bool BasicBlockEdge::isSingleEdge() const {
//===----------------------------------------------------------------------===//
template class llvm::DomTreeNodeBase<BasicBlock>;
-template class llvm::DominatorTreeBase<BasicBlock>;
-
-template void llvm::DomTreeBuilder::Calculate<Function, BasicBlock *>(
- DominatorTreeBase<
- typename std::remove_pointer<GraphTraits<BasicBlock *>::NodeRef>::type>
- &DT,
- Function &F);
-template void llvm::DomTreeBuilder::Calculate<Function, Inverse<BasicBlock *>>(
- DominatorTreeBase<typename std::remove_pointer<
- GraphTraits<Inverse<BasicBlock *>>::NodeRef>::type> &DT,
- Function &F);
-template bool llvm::DomTreeBuilder::Verify<BasicBlock *>(
- const DominatorTreeBase<
- typename std::remove_pointer<GraphTraits<BasicBlock *>::NodeRef>::type>
- &DT);
-template bool llvm::DomTreeBuilder::Verify<Inverse<BasicBlock *>>(
- const DominatorTreeBase<typename std::remove_pointer<
- GraphTraits<Inverse<BasicBlock *>>::NodeRef>::type> &DT);
+template class llvm::DominatorTreeBase<BasicBlock, false>; // DomTreeBase
+template class llvm::DominatorTreeBase<BasicBlock, true>; // PostDomTreeBase
+
+template void
+llvm::DomTreeBuilder::Calculate<DomTreeBuilder::BBDomTree, Function>(
+ DomTreeBuilder::BBDomTree &DT, Function &F);
+template void
+llvm::DomTreeBuilder::Calculate<DomTreeBuilder::BBPostDomTree, Function>(
+ DomTreeBuilder::BBPostDomTree &DT, Function &F);
+
+template void llvm::DomTreeBuilder::InsertEdge<DomTreeBuilder::BBDomTree>(
+ DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To);
+template void llvm::DomTreeBuilder::InsertEdge<DomTreeBuilder::BBPostDomTree>(
+ DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To);
+
+template void llvm::DomTreeBuilder::DeleteEdge<DomTreeBuilder::BBDomTree>(
+ DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To);
+template void llvm::DomTreeBuilder::DeleteEdge<DomTreeBuilder::BBPostDomTree>(
+ DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To);
+
+template bool llvm::DomTreeBuilder::Verify<DomTreeBuilder::BBDomTree>(
+ const DomTreeBuilder::BBDomTree &DT);
+template bool llvm::DomTreeBuilder::Verify<DomTreeBuilder::BBPostDomTree>(
+ const DomTreeBuilder::BBPostDomTree &DT);
bool DominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index e413a4f34432..bea2c7ae8ff2 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -990,24 +990,26 @@ template <> struct MDNodeKeyImpl<DIImportedEntity> {
unsigned Tag;
Metadata *Scope;
Metadata *Entity;
+ Metadata *File;
unsigned Line;
MDString *Name;
- MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, unsigned Line,
- MDString *Name)
- : Tag(Tag), Scope(Scope), Entity(Entity), Line(Line), Name(Name) {}
+ MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, Metadata *File,
+ unsigned Line, MDString *Name)
+ : Tag(Tag), Scope(Scope), Entity(Entity), File(File), Line(Line),
+ Name(Name) {}
MDNodeKeyImpl(const DIImportedEntity *N)
: Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()),
- Line(N->getLine()), Name(N->getRawName()) {}
+ File(N->getRawFile()), Line(N->getLine()), Name(N->getRawName()) {}
bool isKeyOf(const DIImportedEntity *RHS) const {
return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
- Entity == RHS->getRawEntity() && Line == RHS->getLine() &&
- Name == RHS->getRawName();
+ Entity == RHS->getRawEntity() && File == RHS->getFile() &&
+ Line == RHS->getLine() && Name == RHS->getRawName();
}
unsigned getHashValue() const {
- return hash_combine(Tag, Scope, Entity, Line, Name);
+ return hash_combine(Tag, Scope, Entity, File, Line, Name);
}
};
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index 29e2f42d3e05..995e1e570340 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -625,21 +625,21 @@ void PMTopLevelManager::schedulePass(Pass *P) {
checkAnalysis = false;
const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
- for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
- E = RequiredSet.end(); I != E; ++I) {
+ for (const AnalysisID ID : RequiredSet) {
- Pass *AnalysisPass = findAnalysisPass(*I);
+ Pass *AnalysisPass = findAnalysisPass(ID);
if (!AnalysisPass) {
- const PassInfo *PI = findAnalysisPassInfo(*I);
+ const PassInfo *PI = findAnalysisPassInfo(ID);
if (!PI) {
// Pass P is not in the global PassRegistry
dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n";
dbgs() << "Verify if there is a pass dependency cycle." << "\n";
dbgs() << "Required Passes:" << "\n";
- for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(),
- E = RequiredSet.end(); I2 != E && I2 != I; ++I2) {
- Pass *AnalysisPass2 = findAnalysisPass(*I2);
+ for (const AnalysisID ID2 : RequiredSet) {
+ if (ID == ID2)
+ break;
+ Pass *AnalysisPass2 = findAnalysisPass(ID2);
if (AnalysisPass2) {
dbgs() << "\t" << AnalysisPass2->getPassName() << "\n";
} else {
@@ -1070,17 +1070,15 @@ void PMDataManager::collectRequiredAndUsedAnalyses(
void PMDataManager::initializeAnalysisImpl(Pass *P) {
AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
- for (AnalysisUsage::VectorType::const_iterator
- I = AnUsage->getRequiredSet().begin(),
- E = AnUsage->getRequiredSet().end(); I != E; ++I) {
- Pass *Impl = findAnalysisPass(*I, true);
+ for (const AnalysisID ID : AnUsage->getRequiredSet()) {
+ Pass *Impl = findAnalysisPass(ID, true);
if (!Impl)
// This may be analysis pass that is initialized on the fly.
// If that is not the case then it will raise an assert when it is used.
continue;
AnalysisResolver *AR = P->getResolver();
assert(AR && "Analysis Resolver is not set");
- AR->addAnalysisImplsPair(*I, Impl);
+ AR->addAnalysisImplsPair(ID, Impl);
}
}
@@ -1112,21 +1110,19 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
TPM->collectLastUses(LUses, P);
- for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
- E = LUses.end(); I != E; ++I) {
+ for (Pass *P : LUses) {
dbgs() << "--" << std::string(Offset*2, ' ');
- (*I)->dumpPassStructure(0);
+ P->dumpPassStructure(0);
}
}
void PMDataManager::dumpPassArguments() const {
- for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
- E = PassVector.end(); I != E; ++I) {
- if (PMDataManager *PMD = (*I)->getAsPMDataManager())
+ for (Pass *P : PassVector) {
+ if (PMDataManager *PMD = P->getAsPMDataManager())
PMD->dumpPassArguments();
else
if (const PassInfo *PI =
- TPM->findAnalysisPassInfo((*I)->getPassID()))
+ TPM->findAnalysisPassInfo(P->getPassID()))
if (!PI->isAnalysisGroup())
dbgs() << " -" << PI->getPassArgument();
}
@@ -1255,9 +1251,8 @@ Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
// Destructor
PMDataManager::~PMDataManager() {
- for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
- E = PassVector.end(); I != E; ++I)
- delete *I;
+ for (Pass *P : PassVector)
+ delete P;
}
//===----------------------------------------------------------------------===//
@@ -1284,35 +1279,35 @@ bool BBPassManager::runOnFunction(Function &F) {
bool Changed = doInitialization(F);
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (BasicBlock &BB : F)
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
BasicBlockPass *BP = getContainedPass(Index);
bool LocalChanged = false;
- dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
+ dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, BB.getName());
dumpRequiredSet(BP);
initializeAnalysisImpl(BP);
{
// If the pass crashes, remember this.
- PassManagerPrettyStackEntry X(BP, *I);
+ PassManagerPrettyStackEntry X(BP, BB);
TimeRegion PassTimer(getPassTimer(BP));
- LocalChanged |= BP->runOnBasicBlock(*I);
+ LocalChanged |= BP->runOnBasicBlock(BB);
}
Changed |= LocalChanged;
if (LocalChanged)
dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
- I->getName());
+ BB.getName());
dumpPreservedSet(BP);
dumpUsedSet(BP);
verifyPreservedAnalysis(BP);
removeNotPreservedAnalysis(BP);
recordAvailableAnalysis(BP);
- removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
+ removeDeadPasses(BP, BB.getName(), ON_BASICBLOCK_MSG);
}
return doFinalization(F) || Changed;
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index fdc7de6eaa34..c230a50044c7 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -103,7 +103,7 @@ std::unique_ptr<RandomNumberGenerator> Module::createRNG(const Pass* P) const {
// store salt metadata from the Module constructor.
Salt += sys::path::filename(getModuleIdentifier());
- return std::unique_ptr<RandomNumberGenerator>{new RandomNumberGenerator(Salt)};
+ return std::unique_ptr<RandomNumberGenerator>(new RandomNumberGenerator(Salt));
}
/// getNamedValue - Return the first global value in the module with
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index 4034f9039dda..b052c76d1fed 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -318,7 +318,8 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
continue;
if (!(Symflags & object::SymbolRef::SF_Global))
continue;
- if (Symflags & object::SymbolRef::SF_Undefined)
+ if (Symflags & object::SymbolRef::SF_Undefined &&
+ !(Symflags & object::SymbolRef::SF_Indirect))
continue;
unsigned NameOffset = NameOS.tell();
diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp
index 740bf94d40e0..d1f46fdfa292 100644
--- a/lib/Object/COFFImportFile.cpp
+++ b/lib/Object/COFFImportFile.cpp
@@ -131,14 +131,14 @@ class ObjectFactory {
using u32 = support::ulittle32_t;
MachineTypes Machine;
BumpPtrAllocator Alloc;
- StringRef DLLName;
+ StringRef ImportName;
StringRef Library;
std::string ImportDescriptorSymbolName;
std::string NullThunkSymbolName;
public:
ObjectFactory(StringRef S, MachineTypes M)
- : Machine(M), DLLName(S), Library(S.drop_back(4)),
+ : Machine(M), ImportName(S), Library(S.drop_back(4)),
ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()),
NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {}
@@ -162,14 +162,17 @@ public:
// Library Format.
NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal,
ImportType Type, ImportNameType NameType);
+
+ // Create a weak external file which is described in PE/COFF Aux Format 3.
+ NewArchiveMember createWeakExternal(StringRef Sym, StringRef Weak, bool Imp);
};
} // namespace
NewArchiveMember
ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
- static const uint32_t NumberOfSections = 2;
- static const uint32_t NumberOfSymbols = 7;
- static const uint32_t NumberOfRelocations = 3;
+ const uint32_t NumberOfSections = 2;
+ const uint32_t NumberOfSymbols = 7;
+ const uint32_t NumberOfRelocations = 3;
// COFF Header
coff_file_header Header{
@@ -181,7 +184,7 @@ ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
sizeof(coff_import_directory_table_entry) +
NumberOfRelocations * sizeof(coff_relocation) +
// .idata$4
- (DLLName.size() + 1)),
+ (ImportName.size() + 1)),
u32(NumberOfSymbols),
u16(0),
u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0),
@@ -189,7 +192,7 @@ ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
append(Buffer, Header);
// Section Header Table
- static const coff_section SectionTable[NumberOfSections] = {
+ const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'},
u32(0),
u32(0),
@@ -205,7 +208,7 @@ ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'},
u32(0),
u32(0),
- u32(DLLName.size() + 1),
+ u32(ImportName.size() + 1),
u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) +
sizeof(coff_import_directory_table_entry) +
NumberOfRelocations * sizeof(coff_relocation)),
@@ -219,12 +222,12 @@ ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
append(Buffer, SectionTable);
// .idata$2
- static const coff_import_directory_table_entry ImportDescriptor{
+ const coff_import_directory_table_entry ImportDescriptor{
u32(0), u32(0), u32(0), u32(0), u32(0),
};
append(Buffer, ImportDescriptor);
- static const coff_relocation RelocationTable[NumberOfRelocations] = {
+ const coff_relocation RelocationTable[NumberOfRelocations] = {
{u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2),
u16(getImgRelRelocation(Machine))},
{u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)),
@@ -236,9 +239,9 @@ ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
// .idata$6
auto S = Buffer.size();
- Buffer.resize(S + DLLName.size() + 1);
- memcpy(&Buffer[S], DLLName.data(), DLLName.size());
- Buffer[S + DLLName.size()] = '\0';
+ Buffer.resize(S + ImportName.size() + 1);
+ memcpy(&Buffer[S], ImportName.data(), ImportName.size());
+ Buffer[S + ImportName.size()] = '\0';
// Symbol Table
coff_symbol16 SymbolTable[NumberOfSymbols] = {
@@ -302,13 +305,13 @@ ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
NullThunkSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
- return {MemoryBufferRef(F, DLLName)};
+ return {MemoryBufferRef(F, ImportName)};
}
NewArchiveMember
ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
- static const uint32_t NumberOfSections = 1;
- static const uint32_t NumberOfSymbols = 1;
+ const uint32_t NumberOfSections = 1;
+ const uint32_t NumberOfSymbols = 1;
// COFF Header
coff_file_header Header{
@@ -325,7 +328,7 @@ ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
append(Buffer, Header);
// Section Header Table
- static const coff_section SectionTable[NumberOfSections] = {
+ const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '3'},
u32(0),
u32(0),
@@ -342,7 +345,7 @@ ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
append(Buffer, SectionTable);
// .idata$3
- static const coff_import_directory_table_entry ImportDescriptor{
+ const coff_import_directory_table_entry ImportDescriptor{
u32(0), u32(0), u32(0), u32(0), u32(0),
};
append(Buffer, ImportDescriptor);
@@ -363,12 +366,12 @@ ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
writeStringTable(Buffer, {NullImportDescriptorSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
- return {MemoryBufferRef(F, DLLName)};
+ return {MemoryBufferRef(F, ImportName)};
}
NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
- static const uint32_t NumberOfSections = 2;
- static const uint32_t NumberOfSymbols = 1;
+ const uint32_t NumberOfSections = 2;
+ const uint32_t NumberOfSymbols = 1;
uint32_t VASize = is32bit(Machine) ? 4 : 8;
// COFF Header
@@ -388,7 +391,7 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
append(Buffer, Header);
// Section Header Table
- static const coff_section SectionTable[NumberOfSections] = {
+ const coff_section SectionTable[NumberOfSections] = {
{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'},
u32(0),
u32(0),
@@ -445,14 +448,14 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
writeStringTable(Buffer, {NullThunkSymbolName});
StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()};
- return {MemoryBufferRef{F, DLLName}};
+ return {MemoryBufferRef{F, ImportName}};
}
NewArchiveMember ObjectFactory::createShortImport(StringRef Sym,
uint16_t Ordinal,
ImportType ImportType,
ImportNameType NameType) {
- size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs
+ size_t ImpSize = ImportName.size() + Sym.size() + 2; // +2 for NULs
size_t Size = sizeof(coff_import_header) + ImpSize;
char *Buf = Alloc.Allocate<char>(Size);
memset(Buf, 0, Size);
@@ -471,17 +474,96 @@ NewArchiveMember ObjectFactory::createShortImport(StringRef Sym,
// Write symbol name and DLL name.
memcpy(P, Sym.data(), Sym.size());
P += Sym.size() + 1;
- memcpy(P, DLLName.data(), DLLName.size());
+ memcpy(P, ImportName.data(), ImportName.size());
- return {MemoryBufferRef(StringRef(Buf, Size), DLLName)};
+ return {MemoryBufferRef(StringRef(Buf, Size), ImportName)};
}
-std::error_code writeImportLibrary(StringRef DLLName, StringRef Path,
+NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym,
+ StringRef Weak, bool Imp) {
+ std::vector<uint8_t> Buffer;
+ const uint32_t NumberOfSections = 1;
+ const uint32_t NumberOfSymbols = 5;
+
+ // COFF Header
+ coff_file_header Header{
+ u16(0),
+ u16(NumberOfSections),
+ u32(0),
+ u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))),
+ u32(NumberOfSymbols),
+ u16(0),
+ u16(0),
+ };
+ append(Buffer, Header);
+
+ // Section Header Table
+ const coff_section SectionTable[NumberOfSections] = {
+ {{'.', 'd', 'r', 'e', 'c', 't', 'v', 'e'},
+ u32(0),
+ u32(0),
+ u32(0),
+ u32(0),
+ u32(0),
+ u32(0),
+ u16(0),
+ u16(0),
+ u32(IMAGE_SCN_LNK_INFO | IMAGE_SCN_LNK_REMOVE)}};
+ append(Buffer, SectionTable);
+
+ // Symbol Table
+ coff_symbol16 SymbolTable[NumberOfSymbols] = {
+ {{{'@', 'c', 'o', 'm', 'p', '.', 'i', 'd'}},
+ u32(0),
+ u16(0xFFFF),
+ u16(0),
+ IMAGE_SYM_CLASS_STATIC,
+ 0},
+ {{{'@', 'f', 'e', 'a', 't', '.', '0', '0'}},
+ u32(0),
+ u16(0xFFFF),
+ u16(0),
+ IMAGE_SYM_CLASS_STATIC,
+ 0},
+ {{{0, 0, 0, 0, 0, 0, 0, 0}},
+ u32(0),
+ u16(0),
+ u16(0),
+ IMAGE_SYM_CLASS_EXTERNAL,
+ 0},
+ {{{0, 0, 0, 0, 0, 0, 0, 0}},
+ u32(0),
+ u16(0),
+ u16(0),
+ IMAGE_SYM_CLASS_WEAK_EXTERNAL,
+ 1},
+ {{{2, 0, 0, 0, 3, 0, 0, 0}}, u32(0), u16(0), u16(0), uint8_t(0), 0},
+ };
+ SymbolTable[2].Name.Offset.Offset = sizeof(uint32_t);
+
+ //__imp_ String Table
+ if (Imp) {
+ SymbolTable[3].Name.Offset.Offset = sizeof(uint32_t) + Sym.size() + 7;
+ writeStringTable(Buffer, {std::string("__imp_").append(Sym),
+ std::string("__imp_").append(Weak)});
+ } else {
+ SymbolTable[3].Name.Offset.Offset = sizeof(uint32_t) + Sym.size() + 1;
+ writeStringTable(Buffer, {Sym, Weak});
+ }
+ append(Buffer, SymbolTable);
+
+ // Copied here so we can still use writeStringTable
+ char *Buf = Alloc.Allocate<char>(Buffer.size());
+ memcpy(Buf, Buffer.data(), Buffer.size());
+ return {MemoryBufferRef(StringRef(Buf, Buffer.size()), ImportName)};
+}
+
+std::error_code writeImportLibrary(StringRef ImportName, StringRef Path,
ArrayRef<COFFShortExport> Exports,
MachineTypes Machine) {
std::vector<NewArchiveMember> Members;
- ObjectFactory OF(llvm::sys::path::filename(DLLName), Machine);
+ ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine);
std::vector<uint8_t> ImportDescriptor;
Members.push_back(OF.createImportDescriptor(ImportDescriptor));
@@ -496,6 +578,12 @@ std::error_code writeImportLibrary(StringRef DLLName, StringRef Path,
if (E.Private)
continue;
+ if (E.isWeak()) {
+ Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false));
+ Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true));
+ continue;
+ }
+
ImportType ImportType = IMPORT_CODE;
if (E.Data)
ImportType = IMPORT_DATA;
diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp
index 0d69cb6b709c..ed9140d1fe08 100644
--- a/lib/Object/COFFModuleDefinition.cpp
+++ b/lib/Object/COFFModuleDefinition.cpp
@@ -22,6 +22,7 @@
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm::COFF;
@@ -55,8 +56,10 @@ struct Token {
StringRef Value;
};
-static bool isDecorated(StringRef Sym) {
- return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?");
+static bool isDecorated(StringRef Sym, bool MingwDef) {
+ // mingw does not prepend "_".
+ return (!MingwDef && Sym.startswith("_")) || Sym.startswith("@") ||
+ Sym.startswith("?");
}
static Error createError(const Twine &Err) {
@@ -83,6 +86,9 @@ public:
}
case '=':
Buf = Buf.drop_front();
+ // GNU dlltool accepts both = and ==.
+ if (Buf.startswith("="))
+ Buf = Buf.drop_front();
return Token(Equal, "=");
case ',':
Buf = Buf.drop_front();
@@ -120,7 +126,8 @@ private:
class Parser {
public:
- explicit Parser(StringRef S, MachineTypes M) : Lex(S), Machine(M) {}
+ explicit Parser(StringRef S, MachineTypes M, bool B)
+ : Lex(S), Machine(M), MingwDef(B) {}
Expected<COFFModuleDefinition> parse() {
do {
@@ -181,14 +188,17 @@ private:
std::string Name;
if (Error Err = parseName(&Name, &Info.ImageBase))
return Err;
- // Append the appropriate file extension if not already present.
- StringRef Ext = IsDll ? ".dll" : ".exe";
- if (!StringRef(Name).endswith_lower(Ext))
- Name += Ext;
+
+ Info.ImportName = Name;
// Set the output file, but don't override /out if it was already passed.
- if (Info.OutputFile.empty())
+ if (Info.OutputFile.empty()) {
Info.OutputFile = Name;
+ // Append the appropriate file extension if not already present.
+ if (!sys::path::has_extension(Name))
+ Info.OutputFile += IsDll ? ".dll" : ".exe";
+ }
+
return Error::success();
}
case KwVersion:
@@ -213,9 +223,9 @@ private:
}
if (Machine == IMAGE_FILE_MACHINE_I386) {
- if (!isDecorated(E.Name))
+ if (!isDecorated(E.Name, MingwDef))
E.Name = (std::string("_").append(E.Name));
- if (!E.ExtName.empty() && !isDecorated(E.ExtName))
+ if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
E.ExtName = (std::string("_").append(E.ExtName));
}
@@ -308,11 +318,13 @@ private:
std::vector<Token> Stack;
MachineTypes Machine;
COFFModuleDefinition Info;
+ bool MingwDef;
};
Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
- MachineTypes Machine) {
- return Parser(MB.getBuffer(), Machine).parse();
+ MachineTypes Machine,
+ bool MingwDef) {
+ return Parser(MB.getBuffer(), Machine, MingwDef).parse();
}
} // namespace object
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 1e9b0c5b0454..0a2053477caf 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -227,8 +227,11 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const {
if (Symb.isExternal() || Symb.isWeakExternal())
Result |= SymbolRef::SF_Global;
- if (Symb.isWeakExternal())
+ if (Symb.isWeakExternal()) {
Result |= SymbolRef::SF_Weak;
+ // We use indirect to allow the archiver to write weak externs
+ Result |= SymbolRef::SF_Indirect;
+ }
if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE)
Result |= SymbolRef::SF_Absolute;
diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index 0b2ea61c5fe0..81046b217862 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -141,6 +141,33 @@ template <typename T> struct MemberRecordImpl : public MemberRecordBase {
} // end namespace CodeViewYAML
} // end namespace llvm
+void ScalarTraits<GUID>::output(const GUID &G, void *, llvm::raw_ostream &OS) {
+ OS << G;
+}
+
+StringRef ScalarTraits<GUID>::input(StringRef Scalar, void *Ctx, GUID &S) {
+ if (Scalar.size() != 38)
+ return "GUID strings are 38 characters long";
+ if (Scalar[0] != '{' || Scalar[37] != '}')
+ return "GUID is not enclosed in {}";
+ if (Scalar[9] != '-' || Scalar[14] != '-' || Scalar[19] != '-' ||
+ Scalar[24] != '-')
+ return "GUID sections are not properly delineated with dashes";
+
+ uint8_t *OutBuffer = S.Guid;
+ for (auto Iter = Scalar.begin(); Iter != Scalar.end();) {
+ if (*Iter == '-' || *Iter == '{' || *Iter == '}') {
+ ++Iter;
+ continue;
+ }
+ uint8_t Value = (llvm::hexDigitValue(*Iter++) << 4);
+ Value |= llvm::hexDigitValue(*Iter++);
+ *OutBuffer++ = Value;
+ }
+
+ return "";
+}
+
void ScalarTraits<TypeIndex>::output(const TypeIndex &S, void *,
raw_ostream &OS) {
OS << S.getIndex();
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index bcd365236e46..f3b438e829d6 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -390,27 +390,29 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
return Name;
}
+namespace {
+struct OptionInfo {
+ std::string Name;
+ StringRef HelpText;
+};
+} // namespace
+
static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
- std::vector<std::pair<std::string,
- const char*>> &OptionHelp) {
+ std::vector<OptionInfo> &OptionHelp) {
OS << Title << ":\n";
// Find the maximum option length.
unsigned OptionFieldWidth = 0;
for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
- // Skip titles.
- if (!OptionHelp[i].second)
- continue;
-
// Limit the amount of padding we are willing to give up for alignment.
- unsigned Length = OptionHelp[i].first.size();
+ unsigned Length = OptionHelp[i].Name.size();
if (Length <= 23)
OptionFieldWidth = std::max(OptionFieldWidth, Length);
}
const unsigned InitialPad = 2;
for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
- const std::string &Option = OptionHelp[i].first;
+ const std::string &Option = OptionHelp[i].Name;
int Pad = OptionFieldWidth - int(Option.size());
OS.indent(InitialPad) << Option;
@@ -419,7 +421,7 @@ static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
OS << "\n";
Pad = OptionFieldWidth + InitialPad;
}
- OS.indent(Pad + 1) << OptionHelp[i].second << '\n';
+ OS.indent(Pad + 1) << OptionHelp[i].HelpText << '\n';
}
}
@@ -458,8 +460,7 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
// Render help text into a map of group-name to a list of (option, help)
// pairs.
- using helpmap_ty =
- std::map<std::string, std::vector<std::pair<std::string, const char*>>>;
+ using helpmap_ty = std::map<std::string, std::vector<OptionInfo>>;
helpmap_ty GroupedOptionHelp;
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
@@ -478,7 +479,7 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title,
if (const char *Text = getOptionHelpText(Id)) {
const char *HelpGroup = getOptionHelpGroup(*this, Id);
const std::string &OptName = getOptionHelpName(*this, Id);
- GroupedOptionHelp[HelpGroup].push_back(std::make_pair(OptName, Text));
+ GroupedOptionHelp[HelpGroup].push_back({OptName, Text});
}
}
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index fe69151665c6..2fd4f3ea0d45 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -45,22 +45,36 @@ static void *ErrorHandlerUserData = nullptr;
static fatal_error_handler_t BadAllocErrorHandler = nullptr;
static void *BadAllocErrorHandlerUserData = nullptr;
+#if LLVM_ENABLE_THREADS == 1
// Mutexes to synchronize installing error handlers and calling error handlers.
// Do not use ManagedStatic, or that may allocate memory while attempting to
// report an OOM.
+//
+// This usage of std::mutex has to be conditionalized behind ifdefs because
+// of this script:
+// compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
+// That script attempts to statically link the LLVM symbolizer library with the
+// STL and hide all of its symbols with 'opt -internalize'. To reduce size, it
+// cuts out the threading portions of the hermetic copy of libc++ that it
+// builds. We can remove these ifdefs if that script goes away.
static std::mutex ErrorHandlerMutex;
static std::mutex BadAllocErrorHandlerMutex;
+#endif
void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
void *user_data) {
+#if LLVM_ENABLE_THREADS == 1
std::lock_guard<std::mutex> Lock(ErrorHandlerMutex);
+#endif
assert(!ErrorHandler && "Error handler already registered!\n");
ErrorHandler = handler;
ErrorHandlerUserData = user_data;
}
void llvm::remove_fatal_error_handler() {
+#if LLVM_ENABLE_THREADS == 1
std::lock_guard<std::mutex> Lock(ErrorHandlerMutex);
+#endif
ErrorHandler = nullptr;
ErrorHandlerUserData = nullptr;
}
@@ -83,7 +97,9 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
{
// Only acquire the mutex while reading the handler, so as not to invoke a
// user-supplied callback under a lock.
+#if LLVM_ENABLE_THREADS == 1
std::lock_guard<std::mutex> Lock(ErrorHandlerMutex);
+#endif
handler = ErrorHandler;
handlerData = ErrorHandlerUserData;
}
@@ -112,14 +128,18 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler,
void *user_data) {
+#if LLVM_ENABLE_THREADS == 1
std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex);
+#endif
assert(!ErrorHandler && "Bad alloc error handler already registered!\n");
BadAllocErrorHandler = handler;
BadAllocErrorHandlerUserData = user_data;
}
void llvm::remove_bad_alloc_error_handler() {
+#if LLVM_ENABLE_THREADS == 1
std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex);
+#endif
BadAllocErrorHandler = nullptr;
BadAllocErrorHandlerUserData = nullptr;
}
@@ -130,7 +150,9 @@ void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) {
{
// Only acquire the mutex while reading the handler, so as not to invoke a
// user-supplied callback under a lock.
+#if LLVM_ENABLE_THREADS == 1
std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex);
+#endif
Handler = BadAllocErrorHandler;
HandlerData = BadAllocErrorHandlerUserData;
}
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 9f22f89b3c9e..5cf0316d4d71 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -250,6 +250,8 @@ StringRef sys::detail::getHostCPUNameForS390x(
Pos += sizeof("machine = ") - 1;
unsigned int Id;
if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
+ if (Id >= 3906 && HaveVectorSupport)
+ return "z14";
if (Id >= 2964 && HaveVectorSupport)
return "z13";
if (Id >= 2827)
@@ -460,8 +462,8 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_X64)
#if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
// gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
// FIXME: should we save this for Clang?
__asm__("movq\t%%rbx, %%rsi\n\t"
@@ -470,43 +472,24 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value), "c"(subleaf));
return false;
-#elif defined(_MSC_VER)
- int registers[4];
- __cpuidex(registers, value, subleaf);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
- return false;
-#else
- return true;
-#endif
-#elif defined(__i386__) || defined(_M_IX86)
-#if defined(__GNUC__) || defined(__clang__)
+#elif defined(__i386__)
__asm__("movl\t%%ebx, %%esi\n\t"
"cpuid\n\t"
"xchgl\t%%ebx, %%esi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value), "c"(subleaf));
return false;
-#elif defined(_MSC_VER)
- __asm {
- mov eax,value
- mov ecx,subleaf
- cpuid
- mov esi,rEAX
- mov dword ptr [esi],eax
- mov esi,rEBX
- mov dword ptr [esi],ebx
- mov esi,rECX
- mov dword ptr [esi],ecx
- mov esi,rEDX
- mov dword ptr [esi],edx
- }
- return false;
#else
return true;
#endif
+#elif defined(_MSC_VER)
+ int registers[4];
+ __cpuidex(registers, value, subleaf);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
#else
return true;
#endif
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index e58f856ca244..ea59ba62d7bd 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -13,8 +13,6 @@
#include "llvm/Support/Path.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/BinaryFormat/COFF.h"
-#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index 13bb6f23bc83..e8ef1d2fd8b9 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -452,6 +452,8 @@ bool llvm::AArch64::getExtensionFeatures(unsigned Extensions,
Features.push_back("+ras");
if (Extensions & AArch64::AEK_LSE)
Features.push_back("+lse");
+ if (Extensions & AArch64::AEK_SVE)
+ Features.push_back("+sve");
return true;
}
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 601084f9eae3..65eda246a7fe 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -60,6 +60,14 @@ Input::Input(StringRef InputContent, void *Ctxt,
DocIterator = Strm->begin();
}
+Input::Input(MemoryBufferRef Input, void *Ctxt,
+ SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt)
+ : IO(Ctxt), Strm(new Stream(Input, SrcMgr, false, &EC)) {
+ if (DiagHandler)
+ SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt);
+ DocIterator = Strm->begin();
+}
+
Input::~Input() = default;
std::error_code Input::error() { return EC; }
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 9480cd46d28f..dd58eccee957 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -326,13 +326,30 @@ raw_ostream &raw_ostream::operator<<(const formatv_object_base &Obj) {
}
raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
- unsigned Len = FS.Str.size();
- int PadAmount = FS.Width - Len;
- if (FS.RightJustify && (PadAmount > 0))
- this->indent(PadAmount);
- this->operator<<(FS.Str);
- if (!FS.RightJustify && (PadAmount > 0))
+ if (FS.Str.size() >= FS.Width || FS.Justify == FormattedString::JustifyNone) {
+ this->operator<<(FS.Str);
+ return *this;
+ }
+ const size_t Difference = FS.Width - FS.Str.size();
+ switch (FS.Justify) {
+ case FormattedString::JustifyLeft:
+ this->operator<<(FS.Str);
+ this->indent(Difference);
+ break;
+ case FormattedString::JustifyRight:
+ this->indent(Difference);
+ this->operator<<(FS.Str);
+ break;
+ case FormattedString::JustifyCenter: {
+ int PadAmount = Difference / 2;
this->indent(PadAmount);
+ this->operator<<(FS.Str);
+ this->indent(Difference - PadAmount);
+ break;
+ }
+ default:
+ llvm_unreachable("Bad Justification");
+ }
return *this;
}
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index 37b9690d0434..1dda746a6be1 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -44,6 +44,8 @@ ModulePass *createAArch64PromoteConstantPass();
FunctionPass *createAArch64ConditionOptimizerPass();
FunctionPass *createAArch64A57FPLoadBalancing();
FunctionPass *createAArch64A53Fix835769();
+FunctionPass *createFalkorHWPFFixPass();
+FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
@@ -66,6 +68,8 @@ void initializeAArch64VectorByElementOptPass(PassRegistry&);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);
+void initializeFalkorHWPFFixPass(PassRegistry&);
+void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
void initializeLDTLSCleanupPass(PassRegistry&);
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 53eef79c4df3..436bf1193304 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -50,6 +50,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
"Enable Statistical Profiling extension">;
+def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
+ "Enable Scalable Vector Extension (SVE) instructions">;
+
/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -269,6 +272,7 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 938779d23690..291bc5ea858e 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -118,6 +118,13 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
]>;
+// Vararg functions on windows pass floats in integer registers
+def CC_AArch64_Win64_VarArg : CallingConv<[
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
+ CCIfType<[f64], CCBitConvertToType<i64>>,
+ CCDelegateTo<CC_AArch64_AAPCS>
+]>;
+
// Darwin uses a calling convention which differs in only two ways
// from the standard one at this level:
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index ee54550c9900..b72f23b109d9 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -102,6 +102,10 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
case AArch64::LDADDALh:
case AArch64::LDADDALs:
case AArch64::LDADDALd:
+ case AArch64::LDCLRALb:
+ case AArch64::LDCLRALh:
+ case AArch64::LDCLRALs:
+ case AArch64::LDCLRALd:
case AArch64::LDEORALb:
case AArch64::LDEORALh:
case AArch64::LDEORALs:
diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
new file mode 100644
index 000000000000..c0e22355a9ff
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -0,0 +1,790 @@
+//===-- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
+/// that may inhibit the HW prefetching. This is done in two steps. Before
+/// ISel, we mark strided loads (i.e. those that will likely benefit from
+/// prefetching) with metadata. Then, after opcodes have been finalized, we
+/// insert MOVs and re-write loads to prevent unintnentional tag collisions.
+// ===---------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "falkor-hwpf-fix"
+
+STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
+STATISTIC(NumCollisionsAvoided,
+ "Number of HW prefetch tag collisions avoided");
+STATISTIC(NumCollisionsNotAvoided,
+ "Number of HW prefetch tag collisions not avoided due to lack of regsiters");
+
+namespace {
+
+class FalkorMarkStridedAccesses {
+public:
+ FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE)
+ : LI(LI), SE(SE) {}
+
+ bool run();
+
+private:
+ bool runOnLoop(Loop &L);
+
+ LoopInfo &LI;
+ ScalarEvolution &SE;
+};
+
+class FalkorMarkStridedAccessesLegacy : public FunctionPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
+ initializeFalkorMarkStridedAccessesLegacyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ // FIXME: For some reason, preserving SE here breaks LSR (even if
+ // this pass changes nothing).
+ // AU.addPreserved<ScalarEvolutionWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
+
+char FalkorMarkStridedAccessesLegacy::ID = 0;
+INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
+ "Falkor HW Prefetch Fix", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
+ "Falkor HW Prefetch Fix", false, false)
+
+FunctionPass *llvm::createFalkorMarkStridedAccessesPass() {
+ return new FalkorMarkStridedAccessesLegacy();
+}
+
+bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) {
+ TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ const AArch64Subtarget *ST =
+ TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F);
+ if (ST->getProcFamily() != AArch64Subtarget::Falkor)
+ return false;
+
+ if (skipFunction(F))
+ return false;
+
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ FalkorMarkStridedAccesses LDP(LI, SE);
+ return LDP.run();
+}
+
+bool FalkorMarkStridedAccesses::run() {
+ bool MadeChange = false;
+
+ for (Loop *L : LI)
+ for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt)
+ MadeChange |= runOnLoop(**LIt);
+
+ return MadeChange;
+}
+
+bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
+ // Only mark strided loads in the inner-most loop
+ if (!L.empty())
+ return false;
+
+ bool MadeChange = false;
+
+ for (BasicBlock *BB : L.blocks()) {
+ for (Instruction &I : *BB) {
+ LoadInst *LoadI = dyn_cast<LoadInst>(&I);
+ if (!LoadI)
+ continue;
+
+ Value *PtrValue = LoadI->getPointerOperand();
+ if (L.isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE.getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
+ continue;
+
+ LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD,
+ MDNode::get(LoadI->getContext(), {}));
+ ++NumStridedLoadsMarked;
+ DEBUG(dbgs() << "Load: " << I << " marked as strided\n");
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+namespace {
+
+class FalkorHWPFFix : public MachineFunctionPass {
+public:
+ static char ID;
+
+ FalkorHWPFFix() : MachineFunctionPass(ID) {
+ initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ void runOnLoop(MachineLoop &L, MachineFunction &Fn);
+
+ const AArch64InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ DenseMap<unsigned, SmallVector<MachineInstr *, 4>> TagMap;
+ bool Modified;
+};
+
+/// Bits from load opcodes used to compute HW prefetcher instruction tags.
+struct LoadInfo {
+ LoadInfo()
+ : DestReg(0), BaseReg(0), BaseRegIdx(-1), OffsetOpnd(nullptr),
+ IsPrePost(false) {}
+ unsigned DestReg;
+ unsigned BaseReg;
+ int BaseRegIdx;
+ const MachineOperand *OffsetOpnd;
+ bool IsPrePost;
+};
+
+} // namespace
+
+char FalkorHWPFFix::ID = 0;
+
+INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "falkor-hwpf-fix-late",
+ "Falkor HW Prefetch Fix Late Phase", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(FalkorHWPFFix, "falkor-hwpf-fix-late",
+ "Falkor HW Prefetch Fix Late Phase", false, false)
+
+static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
+ return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8);
+}
+
+static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
+ int DestRegIdx;
+ int BaseRegIdx;
+ int OffsetIdx;
+ bool IsPrePost;
+
+ switch (MI.getOpcode()) {
+ default:
+ return None;
+
+ case AArch64::LD1i8:
+ case AArch64::LD1i16:
+ case AArch64::LD1i32:
+ case AArch64::LD1i64:
+ case AArch64::LD2i8:
+ case AArch64::LD2i16:
+ case AArch64::LD2i32:
+ case AArch64::LD2i64:
+ case AArch64::LD3i8:
+ case AArch64::LD3i16:
+ case AArch64::LD3i32:
+ case AArch64::LD4i8:
+ case AArch64::LD4i16:
+ case AArch64::LD4i32:
+ DestRegIdx = 0;
+ BaseRegIdx = 3;
+ OffsetIdx = -1;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LD3i64:
+ case AArch64::LD4i64:
+ DestRegIdx = -1;
+ BaseRegIdx = 3;
+ OffsetIdx = -1;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LD1Onev1d:
+ case AArch64::LD1Onev2s:
+ case AArch64::LD1Onev4h:
+ case AArch64::LD1Onev8b:
+ case AArch64::LD1Onev2d:
+ case AArch64::LD1Onev4s:
+ case AArch64::LD1Onev8h:
+ case AArch64::LD1Onev16b:
+ case AArch64::LD1Rv1d:
+ case AArch64::LD1Rv2s:
+ case AArch64::LD1Rv4h:
+ case AArch64::LD1Rv8b:
+ case AArch64::LD1Rv2d:
+ case AArch64::LD1Rv4s:
+ case AArch64::LD1Rv8h:
+ case AArch64::LD1Rv16b:
+ case AArch64::LD1Twov1d:
+ case AArch64::LD1Twov2s:
+ case AArch64::LD1Twov4h:
+ case AArch64::LD1Twov8b:
+ case AArch64::LD2Twov2s:
+ case AArch64::LD2Twov4s:
+ case AArch64::LD2Twov8b:
+ case AArch64::LD2Rv1d:
+ case AArch64::LD2Rv2s:
+ case AArch64::LD2Rv4s:
+ case AArch64::LD2Rv8b:
+ DestRegIdx = 0;
+ BaseRegIdx = 1;
+ OffsetIdx = -1;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LD1Twov2d:
+ case AArch64::LD1Twov4s:
+ case AArch64::LD1Twov8h:
+ case AArch64::LD1Twov16b:
+ case AArch64::LD1Threev1d:
+ case AArch64::LD1Threev2s:
+ case AArch64::LD1Threev4h:
+ case AArch64::LD1Threev8b:
+ case AArch64::LD1Threev2d:
+ case AArch64::LD1Threev4s:
+ case AArch64::LD1Threev8h:
+ case AArch64::LD1Threev16b:
+ case AArch64::LD1Fourv1d:
+ case AArch64::LD1Fourv2s:
+ case AArch64::LD1Fourv4h:
+ case AArch64::LD1Fourv8b:
+ case AArch64::LD1Fourv2d:
+ case AArch64::LD1Fourv4s:
+ case AArch64::LD1Fourv8h:
+ case AArch64::LD1Fourv16b:
+ case AArch64::LD2Twov2d:
+ case AArch64::LD2Twov4h:
+ case AArch64::LD2Twov8h:
+ case AArch64::LD2Twov16b:
+ case AArch64::LD2Rv2d:
+ case AArch64::LD2Rv4h:
+ case AArch64::LD2Rv8h:
+ case AArch64::LD2Rv16b:
+ case AArch64::LD3Threev2s:
+ case AArch64::LD3Threev4h:
+ case AArch64::LD3Threev8b:
+ case AArch64::LD3Threev2d:
+ case AArch64::LD3Threev4s:
+ case AArch64::LD3Threev8h:
+ case AArch64::LD3Threev16b:
+ case AArch64::LD3Rv1d:
+ case AArch64::LD3Rv2s:
+ case AArch64::LD3Rv4h:
+ case AArch64::LD3Rv8b:
+ case AArch64::LD3Rv2d:
+ case AArch64::LD3Rv4s:
+ case AArch64::LD3Rv8h:
+ case AArch64::LD3Rv16b:
+ case AArch64::LD4Fourv2s:
+ case AArch64::LD4Fourv4h:
+ case AArch64::LD4Fourv8b:
+ case AArch64::LD4Fourv2d:
+ case AArch64::LD4Fourv4s:
+ case AArch64::LD4Fourv8h:
+ case AArch64::LD4Fourv16b:
+ case AArch64::LD4Rv1d:
+ case AArch64::LD4Rv2s:
+ case AArch64::LD4Rv4h:
+ case AArch64::LD4Rv8b:
+ case AArch64::LD4Rv2d:
+ case AArch64::LD4Rv4s:
+ case AArch64::LD4Rv8h:
+ case AArch64::LD4Rv16b:
+ DestRegIdx = -1;
+ BaseRegIdx = 1;
+ OffsetIdx = -1;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LD1i8_POST:
+ case AArch64::LD1i16_POST:
+ case AArch64::LD1i32_POST:
+ case AArch64::LD1i64_POST:
+ case AArch64::LD2i8_POST:
+ case AArch64::LD2i16_POST:
+ case AArch64::LD2i32_POST:
+ case AArch64::LD2i64_POST:
+ case AArch64::LD3i8_POST:
+ case AArch64::LD3i16_POST:
+ case AArch64::LD3i32_POST:
+ case AArch64::LD4i8_POST:
+ case AArch64::LD4i16_POST:
+ case AArch64::LD4i32_POST:
+ DestRegIdx = 1;
+ BaseRegIdx = 4;
+ OffsetIdx = 5;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LD3i64_POST:
+ case AArch64::LD4i64_POST:
+ DestRegIdx = -1;
+ BaseRegIdx = 4;
+ OffsetIdx = 5;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LD1Onev1d_POST:
+ case AArch64::LD1Onev2s_POST:
+ case AArch64::LD1Onev4h_POST:
+ case AArch64::LD1Onev8b_POST:
+ case AArch64::LD1Onev2d_POST:
+ case AArch64::LD1Onev4s_POST:
+ case AArch64::LD1Onev8h_POST:
+ case AArch64::LD1Onev16b_POST:
+ case AArch64::LD1Rv1d_POST:
+ case AArch64::LD1Rv2s_POST:
+ case AArch64::LD1Rv4h_POST:
+ case AArch64::LD1Rv8b_POST:
+ case AArch64::LD1Rv2d_POST:
+ case AArch64::LD1Rv4s_POST:
+ case AArch64::LD1Rv8h_POST:
+ case AArch64::LD1Rv16b_POST:
+ case AArch64::LD1Twov1d_POST:
+ case AArch64::LD1Twov2s_POST:
+ case AArch64::LD1Twov4h_POST:
+ case AArch64::LD1Twov8b_POST:
+ case AArch64::LD2Twov2s_POST:
+ case AArch64::LD2Twov4s_POST:
+ case AArch64::LD2Twov8b_POST:
+ case AArch64::LD2Rv1d_POST:
+ case AArch64::LD2Rv2s_POST:
+ case AArch64::LD2Rv4s_POST:
+ case AArch64::LD2Rv8b_POST:
+ DestRegIdx = 1;
+ BaseRegIdx = 2;
+ OffsetIdx = 3;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LD1Twov2d_POST:
+ case AArch64::LD1Twov4s_POST:
+ case AArch64::LD1Twov8h_POST:
+ case AArch64::LD1Twov16b_POST:
+ case AArch64::LD1Threev1d_POST:
+ case AArch64::LD1Threev2s_POST:
+ case AArch64::LD1Threev4h_POST:
+ case AArch64::LD1Threev8b_POST:
+ case AArch64::LD1Threev2d_POST:
+ case AArch64::LD1Threev4s_POST:
+ case AArch64::LD1Threev8h_POST:
+ case AArch64::LD1Threev16b_POST:
+ case AArch64::LD1Fourv1d_POST:
+ case AArch64::LD1Fourv2s_POST:
+ case AArch64::LD1Fourv4h_POST:
+ case AArch64::LD1Fourv8b_POST:
+ case AArch64::LD1Fourv2d_POST:
+ case AArch64::LD1Fourv4s_POST:
+ case AArch64::LD1Fourv8h_POST:
+ case AArch64::LD1Fourv16b_POST:
+ case AArch64::LD2Twov2d_POST:
+ case AArch64::LD2Twov4h_POST:
+ case AArch64::LD2Twov8h_POST:
+ case AArch64::LD2Twov16b_POST:
+ case AArch64::LD2Rv2d_POST:
+ case AArch64::LD2Rv4h_POST:
+ case AArch64::LD2Rv8h_POST:
+ case AArch64::LD2Rv16b_POST:
+ case AArch64::LD3Threev2s_POST:
+ case AArch64::LD3Threev4h_POST:
+ case AArch64::LD3Threev8b_POST:
+ case AArch64::LD3Threev2d_POST:
+ case AArch64::LD3Threev4s_POST:
+ case AArch64::LD3Threev8h_POST:
+ case AArch64::LD3Threev16b_POST:
+ case AArch64::LD3Rv1d_POST:
+ case AArch64::LD3Rv2s_POST:
+ case AArch64::LD3Rv4h_POST:
+ case AArch64::LD3Rv8b_POST:
+ case AArch64::LD3Rv2d_POST:
+ case AArch64::LD3Rv4s_POST:
+ case AArch64::LD3Rv8h_POST:
+ case AArch64::LD3Rv16b_POST:
+ case AArch64::LD4Fourv2s_POST:
+ case AArch64::LD4Fourv4h_POST:
+ case AArch64::LD4Fourv8b_POST:
+ case AArch64::LD4Fourv2d_POST:
+ case AArch64::LD4Fourv4s_POST:
+ case AArch64::LD4Fourv8h_POST:
+ case AArch64::LD4Fourv16b_POST:
+ case AArch64::LD4Rv1d_POST:
+ case AArch64::LD4Rv2s_POST:
+ case AArch64::LD4Rv4h_POST:
+ case AArch64::LD4Rv8b_POST:
+ case AArch64::LD4Rv2d_POST:
+ case AArch64::LD4Rv4s_POST:
+ case AArch64::LD4Rv8h_POST:
+ case AArch64::LD4Rv16b_POST:
+ DestRegIdx = -1;
+ BaseRegIdx = 2;
+ OffsetIdx = 3;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LDRBBroW:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRBBui:
+ case AArch64::LDRBroW:
+ case AArch64::LDRBroX:
+ case AArch64::LDRBui:
+ case AArch64::LDRDl:
+ case AArch64::LDRDroW:
+ case AArch64::LDRDroX:
+ case AArch64::LDRDui:
+ case AArch64::LDRHHroW:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRHHui:
+ case AArch64::LDRHroW:
+ case AArch64::LDRHroX:
+ case AArch64::LDRHui:
+ case AArch64::LDRQl:
+ case AArch64::LDRQroW:
+ case AArch64::LDRQroX:
+ case AArch64::LDRQui:
+ case AArch64::LDRSBWroW:
+ case AArch64::LDRSBWroX:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSBXroW:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSHWroW:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSHXroW:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSWl:
+ case AArch64::LDRSWroW:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRSWui:
+ case AArch64::LDRSl:
+ case AArch64::LDRSroW:
+ case AArch64::LDRSroX:
+ case AArch64::LDRSui:
+ case AArch64::LDRWl:
+ case AArch64::LDRWroW:
+ case AArch64::LDRWroX:
+ case AArch64::LDRWui:
+ case AArch64::LDRXl:
+ case AArch64::LDRXroW:
+ case AArch64::LDRXroX:
+ case AArch64::LDRXui:
+ case AArch64::LDURBBi:
+ case AArch64::LDURBi:
+ case AArch64::LDURDi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURHi:
+ case AArch64::LDURQi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSWi:
+ case AArch64::LDURSi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi:
+ DestRegIdx = 0;
+ BaseRegIdx = 1;
+ OffsetIdx = 2;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LDRBBpost:
+ case AArch64::LDRBBpre:
+ case AArch64::LDRBpost:
+ case AArch64::LDRBpre:
+ case AArch64::LDRDpost:
+ case AArch64::LDRDpre:
+ case AArch64::LDRHHpost:
+ case AArch64::LDRHHpre:
+ case AArch64::LDRHpost:
+ case AArch64::LDRHpre:
+ case AArch64::LDRQpost:
+ case AArch64::LDRQpre:
+ case AArch64::LDRSBWpost:
+ case AArch64::LDRSBWpre:
+ case AArch64::LDRSBXpost:
+ case AArch64::LDRSBXpre:
+ case AArch64::LDRSHWpost:
+ case AArch64::LDRSHWpre:
+ case AArch64::LDRSHXpost:
+ case AArch64::LDRSHXpre:
+ case AArch64::LDRSWpost:
+ case AArch64::LDRSWpre:
+ case AArch64::LDRSpost:
+ case AArch64::LDRSpre:
+ case AArch64::LDRWpost:
+ case AArch64::LDRWpre:
+ case AArch64::LDRXpost:
+ case AArch64::LDRXpre:
+ DestRegIdx = 1;
+ BaseRegIdx = 2;
+ OffsetIdx = 3;
+ IsPrePost = true;
+ break;
+
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ DestRegIdx = -1;
+ BaseRegIdx = 2;
+ OffsetIdx = 3;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LDPSWi:
+ case AArch64::LDPSi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ DestRegIdx = 0;
+ BaseRegIdx = 2;
+ OffsetIdx = 3;
+ IsPrePost = false;
+ break;
+
+ case AArch64::LDPQpost:
+ case AArch64::LDPQpre:
+ DestRegIdx = -1;
+ BaseRegIdx = 3;
+ OffsetIdx = 4;
+ IsPrePost = true;
+ break;
+
+ case AArch64::LDPDpost:
+ case AArch64::LDPDpre:
+ case AArch64::LDPSWpost:
+ case AArch64::LDPSWpre:
+ case AArch64::LDPSpost:
+ case AArch64::LDPSpre:
+ case AArch64::LDPWpost:
+ case AArch64::LDPWpre:
+ case AArch64::LDPXpost:
+ case AArch64::LDPXpre:
+ DestRegIdx = 1;
+ BaseRegIdx = 3;
+ OffsetIdx = 4;
+ IsPrePost = true;
+ break;
+ }
+
+ LoadInfo LI;
+ LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg();
+ LI.BaseReg = MI.getOperand(BaseRegIdx).getReg();
+ LI.BaseRegIdx = BaseRegIdx;
+ LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
+ LI.IsPrePost = IsPrePost;
+ return LI;
+}
+
+static Optional<unsigned> getTag(const TargetRegisterInfo *TRI,
+ const MachineInstr &MI, const LoadInfo &LI) {
+ unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0;
+ unsigned Base = TRI->getEncodingValue(LI.BaseReg);
+ unsigned Off;
+ if (LI.OffsetOpnd == nullptr)
+ Off = 0;
+ else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() ||
+ LI.OffsetOpnd->isCPI())
+ return None;
+ else if (LI.OffsetOpnd->isReg())
+ Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg());
+ else
+ Off = LI.OffsetOpnd->getImm() >> 2;
+
+ return makeTag(Dest, Base, Off);
+}
+
+void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
+ // Build the initial tag map for the whole loop.
+ TagMap.clear();
+ for (MachineBasicBlock *MBB : L.getBlocks())
+ for (MachineInstr &MI : *MBB) {
+ Optional<LoadInfo> LInfo = getLoadInfo(MI);
+ if (!LInfo)
+ continue;
+ Optional<unsigned> Tag = getTag(TRI, MI, *LInfo);
+ if (!Tag)
+ continue;
+ TagMap[*Tag].push_back(&MI);
+ }
+
+ bool AnyCollisions = false;
+ for (auto &P : TagMap) {
+ auto Size = P.second.size();
+ if (Size > 1) {
+ for (auto *MI : P.second) {
+ if (TII->isStridedAccess(*MI)) {
+ AnyCollisions = true;
+ break;
+ }
+ }
+ }
+ if (AnyCollisions)
+ break;
+ }
+ // Nothing to fix.
+ if (!AnyCollisions)
+ return;
+
+ MachineRegisterInfo &MRI = Fn.getRegInfo();
+
+ // Go through all the basic blocks in the current loop and fix any streaming
+ // loads to avoid collisions with any other loads.
+ LiveRegUnits LR(*TRI);
+ for (MachineBasicBlock *MBB : L.getBlocks()) {
+ LR.clear();
+ LR.addLiveOuts(*MBB);
+ for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) {
+ MachineInstr &MI = *I;
+ if (!TII->isStridedAccess(MI))
+ continue;
+
+ LoadInfo LdI = *getLoadInfo(MI);
+ unsigned OldTag = *getTag(TRI, MI, LdI);
+ auto &OldCollisions = TagMap[OldTag];
+ if (OldCollisions.size() <= 1)
+ continue;
+
+ bool Fixed = false;
+ DEBUG(dbgs() << "Attempting to fix tag collision: " << MI);
+
+ for (unsigned ScratchReg : AArch64::GPR64RegClass) {
+ if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg))
+ continue;
+
+ LoadInfo NewLdI(LdI);
+ NewLdI.BaseReg = ScratchReg;
+ unsigned NewTag = *getTag(TRI, MI, NewLdI);
+ // Scratch reg tag would collide too, so don't use it.
+ if (TagMap.count(NewTag))
+ continue;
+
+ DEBUG(dbgs() << "Changing base reg to: " << PrintReg(ScratchReg, TRI)
+ << '\n');
+
+ // Rewrite:
+ // Xd = LOAD Xb, off
+ // to:
+ // Xc = MOV Xb
+ // Xd = LOAD Xc, off
+ DebugLoc DL = MI.getDebugLoc();
+ BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg)
+ .addReg(AArch64::XZR)
+ .addReg(LdI.BaseReg)
+ .addImm(0);
+ MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx);
+ BaseOpnd.setReg(ScratchReg);
+
+ // If the load does a pre/post increment, then insert a MOV after as
+ // well to update the real base register.
+ if (LdI.IsPrePost) {
+ DEBUG(dbgs() << "Doing post MOV of incremented reg: "
+ << PrintReg(ScratchReg, TRI) << '\n');
+ MI.getOperand(0).setReg(
+ ScratchReg); // Change tied operand pre/post update dest.
+ BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL,
+ TII->get(AArch64::ORRXrs), LdI.BaseReg)
+ .addReg(AArch64::XZR)
+ .addReg(ScratchReg)
+ .addImm(0);
+ }
+
+ for (int I = 0, E = OldCollisions.size(); I != E; ++I)
+ if (OldCollisions[I] == &MI) {
+ std::swap(OldCollisions[I], OldCollisions[E - 1]);
+ OldCollisions.pop_back();
+ break;
+ }
+
+ // Update TagMap to reflect instruction changes to reduce the number
+ // of later MOVs to be inserted. This needs to be done after
+ // OldCollisions is updated since it may be relocated by this
+ // insertion.
+ TagMap[NewTag].push_back(&MI);
+ ++NumCollisionsAvoided;
+ Fixed = true;
+ Modified = true;
+ break;
+ }
+ if (!Fixed)
+ ++NumCollisionsNotAvoided;
+ }
+ }
+}
+
+bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
+ auto &ST = static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+ if (ST.getProcFamily() != AArch64Subtarget::Falkor)
+ return false;
+
+ if (skipFunction(*Fn.getFunction()))
+ return false;
+
+ TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
+ TRI = ST.getRegisterInfo();
+
+ assert(TRI->trackLivenessAfterRegAlloc(Fn) &&
+ "Register liveness not available!");
+
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+
+ Modified = false;
+
+ for (MachineLoop *I : LI)
+ for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
+ // Only process inner-loops
+ if (L->empty())
+ runOnLoop(**L, Fn);
+
+ return Modified;
+}
+
+FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); }
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 3682b62d2b84..97396057dce0 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -5138,6 +5138,7 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
return selectOperator(I, I->getOpcode());
// Silence warnings.
(void)&CC_AArch64_DarwinPCS_VarArg;
+ (void)&CC_AArch64_Win64_VarArg;
}
namespace llvm {
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index e96ee7d29b3e..4907d082eda0 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -41,6 +41,10 @@
// | |
// |-----------------------------------|
// | |
+// | (Win64 only) varargs from reg |
+// | |
+// |-----------------------------------|
+// | |
// | prev_fp, prev_lr |
// | (a.k.a. "frame record") |
// |-----------------------------------| <- fp(=x29)
@@ -950,7 +954,13 @@ static void computeCalleeSaveRegisterPairs(
CC == CallingConv::PreserveMost ||
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
- unsigned Offset = AFI->getCalleeSavedStackSize();
+ int Offset = AFI->getCalleeSavedStackSize();
+
+ unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
+ if (IsWin64)
+ Offset -= alignTo(GPRSaveSize, 16);
for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 04687847c1a3..06005f6b6886 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -239,10 +239,17 @@ bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_Q:
- // Require the address to be in a register. That is safe for all AArch64
- // variants and it is hard to do anything much smarter without knowing
- // how the operand is used.
- OutOps.push_back(Op);
+ // We need to make sure that this one operand does not end up in XZR, thus
+ // require the address to be in a PointerRegClass register.
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
+ SDLoc dl(Op);
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
+ SDValue NewOp =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op.getValueType(),
+ Op, RC), 0);
+ OutOps.push_back(NewOp);
return false;
}
return true;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60fde5caa339..c6150f9e5d1d 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2650,9 +2650,13 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::PreserveMost:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
+ if (Subtarget->isTargetWindows() && IsVarArg)
+ return CC_AArch64_Win64_VarArg;
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
+ case CallingConv::Win64:
+ return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
}
}
@@ -2668,6 +2672,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -2824,10 +2829,12 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// varargs
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
if (isVarArg) {
- if (!Subtarget->isTargetDarwin()) {
+ if (!Subtarget->isTargetDarwin() || IsWin64) {
// The AAPCS variadic function ABI is identical to the non-variadic
// one. As a result there may be more arguments in registers and we should
// save them for future reference.
+ // Win64 variadic functions also pass arguments in registers, but all float
+ // arguments are passed in integer registers.
saveVarArgRegisters(CCInfo, DAG, DL, Chain);
}
@@ -2869,6 +2876,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
+ bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv());
SmallVector<SDValue, 8> MemOps;
@@ -2881,7 +2889,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
if (GPRSaveSize != 0) {
- GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
+ if (IsWin64)
+ GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
+ else
+ GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
@@ -2890,7 +2901,11 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
SDValue Store = DAG.getStore(
Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
+ IsWin64
+ ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ GPRIdx,
+ (i - FirstVariadicGPR) * 8)
+ : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
MemOps.push_back(Store);
FIN =
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
@@ -2899,7 +2914,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
FuncInfo->setVarArgsGPRIndex(GPRIdx);
FuncInfo->setVarArgsGPRSize(GPRSaveSize);
- if (Subtarget->hasFPARMv8()) {
+ if (Subtarget->hasFPARMv8() && !IsWin64) {
static const MCPhysReg FPRArgRegs[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
@@ -4491,6 +4506,21 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
MachinePointerInfo(SV));
}
+SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ AArch64FunctionInfo *FuncInfo =
+ DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+
+ SDLoc DL(Op);
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
+ ? FuncInfo->getVarArgsGPRIndex()
+ : FuncInfo->getVarArgsStackIndex(),
+ getPointerTy(DAG.getDataLayout()));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV));
+}
+
SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SelectionDAG &DAG) const {
// The layout of the va_list struct is specified in the AArch64 Procedure Call
@@ -4562,8 +4592,14 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
- return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
- : LowerAAPCS_VASTART(Op, DAG);
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
+ return LowerWin64_VASTART(Op, DAG);
+ else if (Subtarget->isTargetDarwin())
+ return LowerDarwin_VASTART(Op, DAG);
+ else
+ return LowerAAPCS_VASTART(Op, DAG);
}
SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
@@ -4571,7 +4607,8 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
// pointer.
SDLoc DL(Op);
- unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
+ unsigned VaListSize =
+ Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
@@ -7451,6 +7488,14 @@ AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
}
+MachineMemOperand::Flags
+AArch64TargetLowering::getMMOFlags(const Instruction &I) const {
+ if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
+ I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
+ return MOStridedAccess;
+ return MachineMemOperand::MONone;
+}
+
bool AArch64TargetLowering::isLegalInterleavedAccessType(
VectorType *VecTy, const DataLayout &DL) const {
@@ -10567,9 +10612,6 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (Size > 128) return AtomicExpansionKind::None;
// Nand not supported in LSE.
if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
- // Currently leaving And and Sub to LLSC
- if ((AI->getOperation() == AtomicRMWInst::And) || (AI->getOperation() == AtomicRMWInst::Sub))
- return AtomicExpansionKind::LLSC;
// Leave 128 bits to LLSC.
return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
}
@@ -10783,7 +10825,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
unsigned
AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
- if (Subtarget->isTargetDarwin())
+ if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
return getPointerTy(DL).getSizeInBits();
return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index ecc2517fb288..3b0e0f1de894 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -408,6 +408,19 @@ public:
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
+ bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+ const SelectionDAG &DAG) const override {
+ // Do not merge to float value size (128 bytes) if no implicit
+ // float attribute is set.
+
+ bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat);
+
+ if (NoFloat)
+ return (MemVT.getSizeInBits() <= 64);
+ return true;
+ }
+
bool isCheapToSpeculateCttz() const override {
return true;
}
@@ -455,6 +468,8 @@ public:
unsigned getNumInterleavedAccesses(VectorType *VecTy,
const DataLayout &DL) const;
+ MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
@@ -541,6 +556,7 @@ private:
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
index de283b70210f..eec41ddbc159 100644
--- a/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -451,3 +451,13 @@ def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>
def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>;
def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>;
def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_sub_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
+def : Pat<(atomic_load_sub_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
+def : Pat<(atomic_load_sub_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs (SUBWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
+def : Pat<(atomic_load_sub_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd (SUBXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_and_8 GPR64:$Rn, GPR32:$Rs), (LDCLRALb (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
+def : Pat<(atomic_load_and_16 GPR64:$Rn, GPR32:$Rs), (LDCLRALh (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
+def : Pat<(atomic_load_and_32 GPR64:$Rn, GPR32:$Rs), (LDCLRALs (ORNWrr WZR, GPR32:$Rs), GPR64sp:$Rn)>;
+def : Pat<(atomic_load_and_64 GPR64:$Rn, GPR64:$Rs), (LDCLRALd (ORNXrr XZR, GPR64:$Rs), GPR64sp:$Rn)>;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index dba3e4bdf82f..c0c6055c358f 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -52,9 +52,6 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
-static const MachineMemOperand::Flags MOSuppressPair =
- MachineMemOperand::MOTargetFlag1;
-
static cl::opt<unsigned>
TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
@@ -1715,6 +1712,13 @@ void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
(*MI.memoperands_begin())->setFlags(MOSuppressPair);
}
+/// Check all MachineMemOperands for a hint that the load/store is strided.
+bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const {
+ return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return MMO->getFlags() & MOStridedAccess;
+ });
+}
+
bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
switch (Opc) {
default:
@@ -4433,7 +4437,8 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
- {{MOSuppressPair, "aarch64-suppress-pair"}};
+ {{MOSuppressPair, "aarch64-suppress-pair"},
+ {MOStridedAccess, "aarch64-strided-access"}};
return makeArrayRef(TargetFlags);
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 0809ede4df2a..1765a0263ea4 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -27,6 +27,13 @@ namespace llvm {
class AArch64Subtarget;
class AArch64TargetMachine;
+static const MachineMemOperand::Flags MOSuppressPair =
+ MachineMemOperand::MOTargetFlag1;
+static const MachineMemOperand::Flags MOStridedAccess =
+ MachineMemOperand::MOTargetFlag2;
+
+#define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access"
+
class AArch64InstrInfo final : public AArch64GenInstrInfo {
const AArch64RegisterInfo RI;
const AArch64Subtarget &Subtarget;
@@ -81,6 +88,9 @@ public:
/// unprofitable.
bool isLdStPairSuppressed(const MachineInstr &MI) const;
+ /// Return true if the given load or store is a strided memory access.
+ bool isStridedAccess(const MachineInstr &MI) const;
+
/// Return true if this is an unscaled load/store.
bool isUnscaledLdSt(unsigned Opc) const;
@@ -356,7 +366,7 @@ enum AArch64FrameOffsetStatus {
/// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to
/// use an offset.eq
/// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be
-/// rewriten in @p MI.
+/// rewritten in @p MI.
/// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the
/// amount that is off the limit of the legal offset.
/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 0be14673eb20..0dcf07f98412 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -37,6 +37,8 @@ def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
def HasSPE : Predicate<"Subtarget->hasSPE()">,
AssemblerPredicate<"FeatureSPE", "spe">;
+def HasSVE : Predicate<"Subtarget->hasSVE()">,
+ AssemblerPredicate<"FeatureSVE", "sve">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 4a0a7c36baf8..ffb27834c31c 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -82,7 +82,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({Op, 1, s1}, Legal);
}
- for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV})
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index fab92e139dd0..9f7dcb3fe1c3 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -74,7 +74,7 @@ const uint32_t *
AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
- // This is academic becase all GHC calls are (supposed to be) tail calls
+ // This is academic because all GHC calls are (supposed to be) tail calls
return CSR_AArch64_NoRegs_RegMask;
if (CC == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_RegMask;
@@ -167,7 +167,7 @@ bool AArch64RegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
const TargetRegisterClass *
AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
- return &AArch64::GPR64RegClass;
+ return &AArch64::GPR64spRegClass;
}
const TargetRegisterClass *
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index a3238cf3b60f..ea6112452736 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -134,7 +134,9 @@ void AArch64Subtarget::initializeProperties() {
case CortexA72:
PrefFunctionAlignment = 4;
break;
- case CortexA73: break;
+ case CortexA73:
+ PrefFunctionAlignment = 4;
+ break;
case Others: break;
}
}
@@ -171,7 +173,8 @@ struct AArch64GISelActualAccessor : public GISelAccessor {
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
- : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()),
+ : AArch64GenSubtargetInfo(TT, CPU, FS),
+ ReserveX18(TT.isOSDarwin() || TT.isOSWindows()),
IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
TLInfo(TM, *this), GISel() {
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index db53946cbc77..5a1f45ee2552 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -70,6 +70,7 @@ protected:
bool HasFullFP16 = false;
bool HasSPE = false;
bool HasLSLFast = false;
+ bool HasSVE = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@@ -251,6 +252,7 @@ public:
bool hasFullFP16() const { return HasFullFP16; }
bool hasSPE() const { return HasSPE; }
bool hasLSLFast() const { return HasLSLFast; }
+ bool hasSVE() const { return HasSVE; }
bool isLittleEndian() const { return IsLittle; }
@@ -304,6 +306,17 @@ public:
bool enableEarlyIfConversion() const override;
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
+
+ bool isCallingConvWin64(CallingConv::ID CC) const {
+ switch (CC) {
+ case CallingConv::C:
+ return isTargetWindows();
+ case CallingConv::Win64:
+ return true;
+ default:
+ return false;
+ }
+ }
};
} // End llvm namespace
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 6237b8f3e7b9..ba28c01a2eff 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -138,6 +138,9 @@ static cl::opt<int> EnableGlobalISelAtO(
cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
cl::init(-1));
+static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
+ cl::init(true), cl::Hidden);
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -158,6 +161,8 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
+ initializeFalkorHWPFFixPass(*PR);
+ initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);
}
@@ -182,7 +187,7 @@ static std::string computeDataLayout(const Triple &TT,
if (TT.isOSBinFormatMachO())
return "e-m:o-i64:64-i128:128-n32:64-S128";
if (TT.isOSBinFormatCOFF())
- return "e-m:w-i64:64-i128:128-n32:64-S128";
+ return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
if (LittleEndian)
return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
@@ -346,8 +351,12 @@ void AArch64PassConfig::addIRPasses() {
//
// Run this before LSR to remove the multiplies involved in computing the
// pointer values N iterations ahead.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch)
- addPass(createLoopDataPrefetchPass());
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (EnableLoopDataPrefetch)
+ addPass(createLoopDataPrefetchPass());
+ if (EnableFalkorHWPFFix)
+ addPass(createFalkorMarkStridedAccessesPass());
+ }
TargetPassConfig::addIRPasses();
@@ -478,8 +487,12 @@ void AArch64PassConfig::addPreSched2() {
// Expand some pseudo instructions to allow proper scheduling.
addPass(createAArch64ExpandPseudoPass());
// Use load/store pair instructions when possible.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
- addPass(createAArch64LoadStoreOptimizationPass());
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (EnableLoadStoreOpt)
+ addPass(createAArch64LoadStoreOptimizationPass());
+ if (EnableFalkorHWPFFix)
+ addPass(createFalkorHWPFFixPass());
+ }
}
void AArch64PassConfig::addPreEmitPass() {
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index fefa7e26b79f..85de02e859e0 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -36,6 +36,8 @@ public:
~AArch64TargetMachine() override;
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
+ // The no argument getSubtargetImpl, while it exists on some, targets is
+ // deprecated and should not be used.
const AArch64Subtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index e841fb894519..a79d51820545 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -86,7 +86,7 @@ private:
bool parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode);
- bool showMatchError(SMLoc Loc, unsigned ErrCode);
+ bool showMatchError(SMLoc Loc, unsigned ErrCode, OperandVector &Operands);
bool parseDirectiveArch(SMLoc L);
bool parseDirectiveCPU(SMLoc L);
@@ -3257,7 +3257,10 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst,
}
}
-bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
+std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS);
+
+bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
+ OperandVector &Operands) {
switch (ErrCode) {
case Match_MissingFeature:
return Error(Loc,
@@ -3380,8 +3383,12 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
return Error(Loc, "expected readable system register");
case Match_MSR:
return Error(Loc, "expected writable system register or pstate");
- case Match_MnemonicFail:
- return Error(Loc, "unrecognized instruction mnemonic");
+ case Match_MnemonicFail: {
+ std::string Suggestion = AArch64MnemonicSpellCheck(
+ ((AArch64Operand &)*Operands[0]).getToken(),
+ ComputeAvailableFeatures(STI->getFeatureBits()));
+ return Error(Loc, "unrecognized instruction mnemonic" + Suggestion);
+ }
default:
llvm_unreachable("unexpected error code!");
}
@@ -3707,7 +3714,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, Msg);
}
case Match_MnemonicFail:
- return showMatchError(IDLoc, MatchResult);
+ return showMatchError(IDLoc, MatchResult, Operands);
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
@@ -3726,7 +3733,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
((AArch64Operand &)*Operands[ErrorInfo]).isTokenSuffix())
MatchResult = Match_InvalidSuffix;
- return showMatchError(ErrorLoc, MatchResult);
+ return showMatchError(ErrorLoc, MatchResult, Operands);
}
case Match_InvalidMemoryIndexed1:
case Match_InvalidMemoryIndexed2:
@@ -3784,7 +3791,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
- return showMatchError(ErrorLoc, MatchResult);
+ return showMatchError(ErrorLoc, MatchResult, Operands);
}
}
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
index 02b12b5e90ca..f7e0a5c7bed3 100644
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -47,6 +47,7 @@ add_llvm_target(AArch64CodeGen
AArch64ConditionalCompares.cpp
AArch64DeadRegisterDefinitionsPass.cpp
AArch64ExpandPseudoInsts.cpp
+ AArch64FalkorHWPFFix.cpp
AArch64FastISel.cpp
AArch64A53Fix835769.cpp
AArch64FrameLowering.cpp
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index a7a7daf4b4a5..2bd0cbf9f7c6 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -104,8 +104,9 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case FK_Data_1:
return 1;
- case FK_Data_2:
case AArch64::fixup_aarch64_movw:
+ case FK_Data_2:
+ case FK_SecRel_2:
return 2;
case AArch64::fixup_aarch64_pcrel_branch14:
@@ -124,6 +125,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
case FK_Data_4:
+ case FK_SecRel_4:
return 4;
case FK_Data_8:
@@ -218,6 +220,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
+ case FK_SecRel_2:
+ case FK_SecRel_4:
return Value;
}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
index 7862a03e771c..31762b9e4cd5 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -27,8 +27,7 @@ namespace {
class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
public:
AArch64WinCOFFObjectWriter()
- : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) {
- }
+ : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) {}
~AArch64WinCOFFObjectWriter() override = default;
@@ -36,19 +35,59 @@ public:
const MCFixup &Fixup, bool IsCrossSection,
const MCAsmBackend &MAB) const override;
- bool recordRelocation(const MCFixup &) const override;
+ bool recordRelocation(const MCFixup &) const override;
};
} // end anonymous namespace
-unsigned
-AArch64WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
- const MCValue &Target,
- const MCFixup &Fixup,
- bool IsCrossSection,
- const MCAsmBackend &MAB) const {
- const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
- report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+unsigned AArch64WinCOFFObjectWriter::getRelocType(
+ MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup,
+ bool IsCrossSection, const MCAsmBackend &MAB) const {
+ auto Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None
+ : Target.getSymA()->getKind();
+
+ switch (static_cast<unsigned>(Fixup.getKind())) {
+ default: {
+ const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
+ report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+ }
+
+ case FK_Data_4:
+ switch (Modifier) {
+ default:
+ return COFF::IMAGE_REL_ARM64_ADDR32;
+ case MCSymbolRefExpr::VK_COFF_IMGREL32:
+ return COFF::IMAGE_REL_ARM64_ADDR32NB;
+ case MCSymbolRefExpr::VK_SECREL:
+ return COFF::IMAGE_REL_ARM64_SECREL;
+ }
+
+ case FK_Data_8:
+ return COFF::IMAGE_REL_ARM64_ADDR64;
+
+ case FK_SecRel_2:
+ return COFF::IMAGE_REL_ARM64_SECTION;
+
+ case FK_SecRel_4:
+ return COFF::IMAGE_REL_ARM64_SECREL;
+
+ case AArch64::fixup_aarch64_add_imm12:
+ return COFF::IMAGE_REL_ARM64_PAGEOFFSET_12A;
+
+ case AArch64::fixup_aarch64_ldst_imm12_scale1:
+ case AArch64::fixup_aarch64_ldst_imm12_scale2:
+ case AArch64::fixup_aarch64_ldst_imm12_scale4:
+ case AArch64::fixup_aarch64_ldst_imm12_scale8:
+ case AArch64::fixup_aarch64_ldst_imm12_scale16:
+ return COFF::IMAGE_REL_ARM64_PAGEOFFSET_12L;
+
+ case AArch64::fixup_aarch64_pcrel_adrp_imm21:
+ return COFF::IMAGE_REL_ARM64_PAGEBASE_REL21;
+
+ case AArch64::fixup_aarch64_pcrel_branch26:
+ case AArch64::fixup_aarch64_pcrel_call26:
+ return COFF::IMAGE_REL_ARM64_BRANCH26;
+ }
}
bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 5a799b2d88d0..568682899be5 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -56,7 +56,7 @@ extern char &AMDGPUMachineCFGStructurizerID;
void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
-ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
+Pass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 7235d8fae332..c68e5861ff25 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -15,8 +15,10 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -26,26 +28,27 @@ using namespace llvm;
namespace {
-class AMDGPUAnnotateKernelFeatures : public ModulePass {
+class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
+ const TargetMachine *TM = nullptr;
AMDGPUAS AS;
- static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
- void addAttrToCallers(Function *Intrin, StringRef AttrName);
- bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
+ bool addFeatureAttributes(Function &F);
public:
static char ID;
- AMDGPUAnnotateKernelFeatures() : ModulePass(ID) {}
- bool runOnModule(Module &M) override;
+ AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
+
+ bool doInitialization(CallGraph &CG) override;
+ bool runOnSCC(CallGraphSCC &SCC) override;
StringRef getPassName() const override {
return "AMDGPU Annotate Kernel Features";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
- ModulePass::getAnalysisUsage(AU);
+ CallGraphSCCPass::getAnalysisUsage(AU);
}
static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
@@ -121,16 +124,130 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
return false;
}
-// Return true if an addrspacecast is used that requires the queue ptr.
-bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
- AMDGPUAS AS) {
+// We do not need to note the x workitem or workgroup id because they are always
+// initialized.
+//
+// TODO: We should not add the attributes if the known compile time workgroup
+// size is 1 for y/z.
+static StringRef intrinsicToAttrName(Intrinsic::ID ID,
+ bool &NonKernelOnly,
+ bool &IsQueuePtr) {
+ switch (ID) {
+ case Intrinsic::amdgcn_workitem_id_x:
+ NonKernelOnly = true;
+ return "amdgpu-work-item-id-x";
+ case Intrinsic::amdgcn_workgroup_id_x:
+ NonKernelOnly = true;
+ return "amdgpu-work-group-id-x";
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::r600_read_tidig_y:
+ return "amdgpu-work-item-id-y";
+ case Intrinsic::amdgcn_workitem_id_z:
+ case Intrinsic::r600_read_tidig_z:
+ return "amdgpu-work-item-id-z";
+ case Intrinsic::amdgcn_workgroup_id_y:
+ case Intrinsic::r600_read_tgid_y:
+ return "amdgpu-work-group-id-y";
+ case Intrinsic::amdgcn_workgroup_id_z:
+ case Intrinsic::r600_read_tgid_z:
+ return "amdgpu-work-group-id-z";
+ case Intrinsic::amdgcn_dispatch_ptr:
+ return "amdgpu-dispatch-ptr";
+ case Intrinsic::amdgcn_dispatch_id:
+ return "amdgpu-dispatch-id";
+ case Intrinsic::amdgcn_kernarg_segment_ptr:
+ case Intrinsic::amdgcn_implicitarg_ptr:
+ return "amdgpu-kernarg-segment-ptr";
+ case Intrinsic::amdgcn_queue_ptr:
+ case Intrinsic::trap:
+ case Intrinsic::debugtrap:
+ IsQueuePtr = true;
+ return "amdgpu-queue-ptr";
+ default:
+ return "";
+ }
+}
+
+static bool handleAttr(Function &Parent, const Function &Callee,
+ StringRef Name) {
+ if (Callee.hasFnAttribute(Name)) {
+ Parent.addFnAttr(Name);
+ return true;
+ }
+
+ return false;
+}
+
+static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
+ bool &NeedQueuePtr) {
+ // X ids unnecessarily propagated to kernels.
+ static const StringRef AttrNames[] = {
+ { "amdgpu-work-item-id-x" },
+ { "amdgpu-work-item-id-y" },
+ { "amdgpu-work-item-id-z" },
+ { "amdgpu-work-group-id-x" },
+ { "amdgpu-work-group-id-y" },
+ { "amdgpu-work-group-id-z" },
+ { "amdgpu-dispatch-ptr" },
+ { "amdgpu-dispatch-id" },
+ { "amdgpu-kernarg-segment-ptr" }
+ };
+
+ if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
+ NeedQueuePtr = true;
+
+ for (StringRef AttrName : AttrNames)
+ handleAttr(Parent, Callee, AttrName);
+}
+
+bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+ bool HasFlat = ST.hasFlatAddressSpace();
+ bool HasApertureRegs = ST.hasApertureRegs();
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
- for (const BasicBlock &BB : F) {
- for (const Instruction &I : BB) {
+ bool Changed = false;
+ bool NeedQueuePtr = false;
+ bool HaveCall = false;
+ bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ CallSite CS(&I);
+ if (CS) {
+ Function *Callee = CS.getCalledFunction();
+
+ // TODO: Do something with indirect calls.
+ if (!Callee) {
+ if (!CS.isInlineAsm())
+ HaveCall = true;
+ continue;
+ }
+
+ Intrinsic::ID IID = Callee->getIntrinsicID();
+ if (IID == Intrinsic::not_intrinsic) {
+ HaveCall = true;
+ copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
+ Changed = true;
+ } else {
+ bool NonKernelOnly = false;
+ StringRef AttrName = intrinsicToAttrName(IID,
+ NonKernelOnly, NeedQueuePtr);
+ if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
+ F.addFnAttr(AttrName);
+ Changed = true;
+ }
+ }
+ }
+
+ if (NeedQueuePtr || HasApertureRegs)
+ continue;
+
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (castRequiresQueuePtr(ASC, AS))
- return true;
+ if (castRequiresQueuePtr(ASC, AS)) {
+ NeedQueuePtr = true;
+ continue;
+ }
}
for (const Use &U : I.operands()) {
@@ -138,100 +255,57 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
if (!OpC)
continue;
- if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
- return true;
+ if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
+ NeedQueuePtr = true;
+ break;
+ }
}
}
}
- return false;
-}
-
-void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
- StringRef AttrName) {
- SmallPtrSet<Function *, 4> SeenFuncs;
-
- for (User *U : Intrin->users()) {
- // CallInst is the only valid user for an intrinsic.
- CallInst *CI = cast<CallInst>(U);
-
- Function *CallingFunction = CI->getParent()->getParent();
- if (SeenFuncs.insert(CallingFunction).second)
- CallingFunction->addFnAttr(AttrName);
+ if (NeedQueuePtr) {
+ F.addFnAttr("amdgpu-queue-ptr");
+ Changed = true;
}
-}
-
-bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
- Module &M,
- ArrayRef<StringRef[2]> IntrinsicToAttr) {
- bool Changed = false;
- for (const StringRef *Arr : IntrinsicToAttr) {
- if (Function *Fn = M.getFunction(Arr[0])) {
- addAttrToCallers(Fn, Arr[1]);
- Changed = true;
- }
+ // TODO: We could refine this to captured pointers that could possibly be
+ // accessed by flat instructions. For now this is mostly a poor way of
+ // estimating whether there are calls before argument lowering.
+ if (HasFlat && !IsFunc && HaveCall) {
+ F.addFnAttr("amdgpu-flat-scratch");
+ Changed = true;
}
return Changed;
}
-bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
+bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
+ Module &M = SCC.getCallGraph().getModule();
Triple TT(M.getTargetTriple());
- AS = AMDGPU::getAMDGPUAS(M);
-
- static const StringRef IntrinsicToAttr[][2] = {
- // .x omitted
- { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
- { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
-
- { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
- { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
-
- { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
- { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
-
- // .x omitted
- { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
- { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
- };
- static const StringRef HSAIntrinsicToAttr[][2] = {
- { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
- { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
- { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
- { "llvm.trap", "amdgpu-queue-ptr" },
- { "llvm.debugtrap", "amdgpu-queue-ptr" }
- };
-
- // TODO: We should not add the attributes if the known compile time workgroup
- // size is 1 for y/z.
-
- // TODO: Intrinsics that require queue ptr.
+ bool Changed = false;
+ for (CallGraphNode *I : SCC) {
+ Function *F = I->getFunction();
+ if (!F || F->isDeclaration())
+ continue;
- // We do not need to note the x workitem or workgroup id because they are
- // always initialized.
+ Changed |= addFeatureAttributes(*F);
+ }
- bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
- if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) {
- Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
- for (Function &F : M) {
- if (F.hasFnAttribute("amdgpu-queue-ptr"))
- continue;
+ return Changed;
+}
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- bool HasApertureRegs = TPC && TPC->getTM<TargetMachine>()
- .getSubtarget<AMDGPUSubtarget>(F)
- .hasApertureRegs();
- if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
- F.addFnAttr("amdgpu-queue-ptr");
- }
- }
+bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ report_fatal_error("TargetMachine is required");
- return Changed;
+ AS = AMDGPU::getAMDGPUAS(CG.getModule());
+ TM = &TPC->getTM<TargetMachine>();
+ return false;
}
-ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
+Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
return new AMDGPUAnnotateKernelFeatures();
}
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 83ad1a5c6ee3..2247814cfe55 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -268,20 +268,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
CurrentProgramInfo.ScratchSize,
getFunctionCodeSize(MF));
- OutStreamer->emitRawComment(" codeLenInByte = " +
- Twine(getFunctionCodeSize(MF)), false);
- OutStreamer->emitRawComment(
- " NumSgprs: " + Twine(CurrentProgramInfo.NumSGPR), false);
- OutStreamer->emitRawComment(
- " NumVgprs: " + Twine(CurrentProgramInfo.NumVGPR), false);
-
OutStreamer->emitRawComment(
" FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
OutStreamer->emitRawComment(
" IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
OutStreamer->emitRawComment(
- " ScratchSize: " + Twine(CurrentProgramInfo.ScratchSize), false);
- OutStreamer->emitRawComment(
" LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
" bytes/workgroup (compile time only)", false);
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2553cf4da0fe..258b1737deb3 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -573,6 +573,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FABS);
+ setTargetDAGCombine(ISD::AssertZext);
+ setTargetDAGCombine(ISD::AssertSext);
}
//===----------------------------------------------------------------------===//
@@ -883,7 +885,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
/// input values across multiple registers. Each item in the Ins array
-/// represents a single value that will be stored in regsters. Ins[x].VT is
+/// represents a single value that will be stored in registers. Ins[x].VT is
/// the value type of the value that will be stored in the register, so
/// whatever SDNode we lower the argument to needs to be this type.
///
@@ -2591,6 +2593,31 @@ SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N,
return SDValue(CSrc, 0);
}
+// FIXME: This should go in generic DAG combiner with an isTruncateFree check,
+// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
+// issues.
+SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+
+ // (vt2 (assertzext (truncate vt0:x), vt1)) ->
+ // (vt2 (truncate (assertzext vt0:x, vt1)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue N1 = N->getOperand(1);
+ EVT ExtVT = cast<VTSDNode>(N1)->getVT();
+ SDLoc SL(N);
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.bitsGE(ExtVT)) {
+ SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);
+ return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg);
+ }
+ }
+
+ return SDValue();
+}
/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
/// binary operation \p Opc to it with the corresponding constant operands.
SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
@@ -3521,6 +3548,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case ISD::AssertZext:
+ case ISD::AssertSext:
+ return performAssertSZExtCombine(N, DCI);
}
return SDValue();
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index a45234e2b39f..d85aada6053a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -76,6 +76,7 @@ protected:
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
unsigned Opc, SDValue LHS,
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 1bc5a52053ec..779617629010 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -277,7 +277,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
// Make sure requested values are compatible with values implied by requested
// minimum/maximum flat work group sizes.
if (RequestedFlatWorkGroupSize &&
- Requested.first > MinImpliedByFlatWorkGroupSize)
+ Requested.first < MinImpliedByFlatWorkGroupSize)
return Default;
return Requested;
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 22cede59086a..d4b6a5fe8020 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -359,6 +359,10 @@ public:
return FP64FP16Denormals;
}
+ bool supportsMinMaxDenormModes() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
bool hasFPExceptions() const {
return FPExceptions;
}
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index e3c90f250600..b37c274102bc 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1208,7 +1208,7 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
}
bool AMDGPUOperand::isLiteralImm(MVT type) const {
- // Check that this imediate can be added as literal
+ // Check that this immediate can be added as literal
if (!isImmTy(ImmTyNone)) {
return false;
}
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index f26e49295e69..966c6fec20c6 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -87,6 +87,7 @@ DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
DECODE_OPERAND_REG(VGPR_32)
DECODE_OPERAND_REG(VS_32)
DECODE_OPERAND_REG(VS_64)
+DECODE_OPERAND_REG(VS_128)
DECODE_OPERAND_REG(VReg_64)
DECODE_OPERAND_REG(VReg_96)
@@ -318,6 +319,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
return decodeSrcOp(OPW64, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
+ return decodeSrcOp(OPW128, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
return decodeSrcOp(OPW16, Val);
}
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 3d71db909e20..4c755be09999 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -70,6 +70,7 @@ public:
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
+ MCOperand decodeOperand_VS_128(unsigned Val) const;
MCOperand decodeOperand_VSrc16(unsigned Val) const;
MCOperand decodeOperand_VSrcV216(unsigned Val) const;
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3af242d9ea66..0aad8f0843d6 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -653,6 +653,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
// again. The same constant folded instruction could also have a second
// use operand.
NextUse = MRI->use_begin(Dst.getReg());
+ FoldList.clear();
continue;
}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 08a64de38501..7334781916d8 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -158,7 +158,7 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
// No replacement necessary.
if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
!MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
- assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister);
+ assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG);
return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
}
@@ -246,13 +246,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
// this point it appears we need the setup. This part of the prolog should be
// emitted after frame indices are eliminated.
- if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
+ if (MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
unsigned SPReg = MFI->getStackPtrOffsetReg();
- if (SPReg != AMDGPU::NoRegister) {
+ if (SPReg != AMDGPU::SP_REG) {
+ assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
+
DebugLoc DL;
- int64_t StackSize = MF.getFrameInfo().getStackSize();
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ int64_t StackSize = FrameInfo.getStackSize();
if (StackSize == 0) {
BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 2ba570b9ebbb..2356405f0919 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1171,8 +1171,7 @@ static void allocateSystemSGPRs(CCState &CCInfo,
static void reservePrivateMemoryRegs(const TargetMachine &TM,
MachineFunction &MF,
const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info,
- bool NeedSP) {
+ SIMachineFunctionInfo &Info) {
// Now that we've figured out where the scratch register inputs are, see if
// should reserve the arguments and use them directly.
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1234,15 +1233,6 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
}
-
- if (NeedSP) {
- unsigned ReservedStackPtrOffsetReg = TRI.reservedStackPtrOffsetReg(MF);
- Info.setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
-
- assert(Info.getStackPtrOffsetReg() != Info.getFrameOffsetReg());
- assert(!TRI.isSubRegister(Info.getScratchRSrcReg(),
- Info.getStackPtrOffsetReg()));
- }
}
SDValue SITargetLowering::LowerFormalArguments(
@@ -1380,10 +1370,37 @@ SDValue SITargetLowering::LowerFormalArguments(
unsigned Reg = VA.getLocReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+ EVT ValVT = VA.getValVT();
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
+ break;
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, DL, VT, Val,
+ DAG.getValueType(ValVT));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
+ DAG.getValueType(ValVT));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ default:
+ llvm_unreachable("Unknown loc info!");
+ }
+
if (IsShader && Arg.VT.isVector()) {
// Build a vector from the registers
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
@@ -1410,25 +1427,13 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
-
- // TODO: Could maybe omit SP if only tail calls?
- bool NeedSP = FrameInfo.hasCalls() || FrameInfo.hasVarSizedObjects();
-
// Start adding system SGPRs.
if (IsEntryFunc) {
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
- reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info, NeedSP);
} else {
CCInfo.AllocateReg(Info->getScratchRSrcReg());
CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
CCInfo.AllocateReg(Info->getFrameOffsetReg());
-
- if (NeedSP) {
- unsigned StackPtrReg = findFirstFreeSGPR(CCInfo);
- CCInfo.AllocateReg(StackPtrReg);
- Info->setStackPtrOffsetReg(StackPtrReg);
- }
}
return Chains.empty() ? Chain :
@@ -4624,8 +4629,8 @@ static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
return DAG.isKnownNeverNaN(Op);
}
-static bool isCanonicalized(SDValue Op, const SISubtarget *ST,
- unsigned MaxDepth=5) {
+static bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
+ const SISubtarget *ST, unsigned MaxDepth=5) {
// If source is a result of another standard FP operation it is already in
// canonical form.
@@ -4663,7 +4668,7 @@ static bool isCanonicalized(SDValue Op, const SISubtarget *ST,
case ISD::FNEG:
case ISD::FABS:
return (MaxDepth > 0) &&
- isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1);
+ isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1);
case ISD::FSIN:
case ISD::FCOS:
@@ -4672,16 +4677,19 @@ static bool isCanonicalized(SDValue Op, const SISubtarget *ST,
// In pre-GFX9 targets V_MIN_F32 and others do not flush denorms.
// For such targets need to check their input recursively.
- // TODO: on GFX9+ we could return true without checking provided no-nan
- // mode, since canonicalization is also used to quiet sNaNs.
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINNAN:
case ISD::FMAXNAN:
+ if (ST->supportsMinMaxDenormModes() &&
+ DAG.isKnownNeverNaN(Op.getOperand(0)) &&
+ DAG.isKnownNeverNaN(Op.getOperand(1)))
+ return true;
+
return (MaxDepth > 0) &&
- isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1) &&
- isCanonicalized(Op.getOperand(1), ST, MaxDepth - 1);
+ isCanonicalized(DAG, Op.getOperand(0), ST, MaxDepth - 1) &&
+ isCanonicalized(DAG, Op.getOperand(1), ST, MaxDepth - 1);
case ISD::ConstantFP: {
auto F = cast<ConstantFPSDNode>(Op)->getValueAPF();
@@ -4700,11 +4708,19 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
if (!CFP) {
SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType().getScalarType();
+ auto ST = getSubtarget();
+
+ if (((VT == MVT::f32 && ST->hasFP32Denormals()) ||
+ (VT == MVT::f64 && ST->hasFP64Denormals()) ||
+ (VT == MVT::f16 && ST->hasFP16Denormals())) &&
+ DAG.isKnownNeverNaN(N0))
+ return N0;
bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) &&
- isCanonicalized(N0, getSubtarget()))
+ isCanonicalized(DAG, N0, ST))
return N0;
return SDValue();
@@ -5813,3 +5829,44 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {
}
return TargetLowering::getConstraintType(Constraint);
}
+
+// Figure out which registers should be reserved for stack access. Only after
+// the function is legalized do we know all of the non-spill stack objects or if
+// calls are present.
+void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ if (Info->isEntryFunction()) {
+ // Callable functions have fixed registers used for stack access.
+ reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
+ }
+
+ // We have to assume the SP is needed in case there are calls in the function
+ // during lowering. Calls are only detected after the function is
+ // lowered. We're about to reserve registers, so don't bother using it if we
+ // aren't really going to use it.
+ bool NeedSP = !Info->isEntryFunction() ||
+ MFI.hasVarSizedObjects() ||
+ MFI.hasCalls();
+
+ if (NeedSP) {
+ unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF);
+ Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
+
+ assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg());
+ assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
+ Info->getStackPtrOffsetReg()));
+ MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
+ }
+
+ MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
+ MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
+ MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
+ Info->getScratchWaveOffsetReg());
+
+ TargetLoweringBase::finalizeLowering(MF);
+}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index 83392a7ab1b2..e6bb3d6cd419 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -232,6 +232,8 @@ public:
ConstraintType getConstraintType(StringRef Constraint) const override;
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL,
SDValue V) const;
+
+ void finalizeLowering(MachineFunction &MF) const override;
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 160f8837d49c..a7e0feb10b9f 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3408,8 +3408,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
}
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
- SmallVector<MachineInstr *, 128> Worklist;
- Worklist.push_back(&TopInst);
+ SetVectorType Worklist;
+ Worklist.insert(&TopInst);
while (!Worklist.empty()) {
MachineInstr &Inst = *Worklist.pop_back_val();
@@ -3610,7 +3610,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
}
-void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3635,7 +3635,7 @@ void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
}
void SIInstrInfo::splitScalar64BitUnaryOp(
- SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst,
+ SetVectorType &Worklist, MachineInstr &Inst,
unsigned Opcode) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3686,7 +3686,7 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
}
void SIInstrInfo::splitScalar64BitBinaryOp(
- SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst,
+ SetVectorType &Worklist, MachineInstr &Inst,
unsigned Opcode) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3753,7 +3753,7 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
}
void SIInstrInfo::splitScalar64BitBCNT(
- SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst) const {
+ SetVectorType &Worklist, MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3789,7 +3789,7 @@ void SIInstrInfo::splitScalar64BitBCNT(
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -3853,12 +3853,12 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
void SIInstrInfo::addUsersToMoveToVALUWorklist(
unsigned DstReg,
MachineRegisterInfo &MRI,
- SmallVectorImpl<MachineInstr *> &Worklist) const {
+ SetVectorType &Worklist) const {
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
E = MRI.use_end(); I != E;) {
MachineInstr &UseMI = *I->getParent();
if (!canReadVGPR(UseMI, I.getOperandNo())) {
- Worklist.push_back(&UseMI);
+ Worklist.insert(&UseMI);
do {
++I;
@@ -3869,7 +3869,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
}
}
-void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
+void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
MachineRegisterInfo &MRI,
MachineInstr &Inst) const {
unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -3932,7 +3932,7 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
}
void SIInstrInfo::addSCCDefUsersToVALUWorklist(
- MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const {
+ MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
// This assumes that all the users of SCC are in the same block
// as the SCC def.
for (MachineInstr &MI :
@@ -3943,7 +3943,7 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(
return;
if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1)
- Worklist.push_back(&MI);
+ Worklist.insert(&MI);
}
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index d00c0d4a7f4e..3dd5bc89e6c7 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -19,6 +19,7 @@
#include "AMDGPUInstrInfo.h"
#include "SIDefines.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/SetVector.h"
namespace llvm {
@@ -38,6 +39,8 @@ private:
EXECZ = 3
};
+ typedef SmallSetVector<MachineInstr *, 32> SetVectorType;
+
static unsigned getBranchOpcode(BranchPredicate Cond);
static BranchPredicate getBranchPredicate(unsigned Opcode);
@@ -56,30 +59,30 @@ private:
void swapOperands(MachineInstr &Inst) const;
- void lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+ void lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const;
- void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ void splitScalar64BitUnaryOp(SetVectorType &Worklist,
MachineInstr &Inst, unsigned Opcode) const;
- void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ void splitScalar64BitBinaryOp(SetVectorType &Worklist,
MachineInstr &Inst, unsigned Opcode) const;
- void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+ void splitScalar64BitBCNT(SetVectorType &Worklist,
MachineInstr &Inst) const;
- void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+ void splitScalar64BitBFE(SetVectorType &Worklist,
MachineInstr &Inst) const;
- void movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
+ void movePackToVALU(SetVectorType &Worklist,
MachineRegisterInfo &MRI,
MachineInstr &Inst) const;
void addUsersToMoveToVALUWorklist(
unsigned Reg, MachineRegisterInfo &MRI,
- SmallVectorImpl<MachineInstr *> &Worklist) const;
+ SetVectorType &Worklist) const;
void
addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
- SmallVectorImpl<MachineInstr *> &Worklist) const;
+ SetVectorType &Worklist) const;
const TargetRegisterClass *
getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index ffb01363e131..088173680fa8 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1436,7 +1436,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit IsPacked = isPackedType<Src0VT>.ret;
field bit HasOpSel = IsPacked;
- field bit HasOMod = !if(HasOpSel, 0, HasModifiers);
+ field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret);
field bit HasSDWAOMod = isFloatType<DstVT>.ret;
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index bcc685015cf5..ba69e42d9125 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1060,7 +1060,7 @@ def : Pat <
class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : Pat <
(i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
- (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
+ (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 3203c38dae34..a7c8166ff6d2 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -23,10 +23,10 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
- ScratchRSrcReg(AMDGPU::NoRegister),
- ScratchWaveOffsetReg(AMDGPU::NoRegister),
- FrameOffsetReg(AMDGPU::NoRegister),
- StackPtrOffsetReg(AMDGPU::NoRegister),
+ ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
+ ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
+ FrameOffsetReg(AMDGPU::FP_REG),
+ StackPtrOffsetReg(AMDGPU::SP_REG),
PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
DispatchPtrUserSGPR(AMDGPU::NoRegister),
QueuePtrUserSGPR(AMDGPU::NoRegister),
@@ -42,6 +42,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
+ WorkItemIDXVGPR(AMDGPU::NoRegister),
+ WorkItemIDYVGPR(AMDGPU::NoRegister),
+ WorkItemIDZVGPR(AMDGPU::NoRegister),
PSInputAddr(0),
PSInputEnable(0),
ReturnsVoid(true),
@@ -87,12 +90,14 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ScratchWaveOffsetReg = AMDGPU::SGPR4;
FrameOffsetReg = AMDGPU::SGPR5;
StackPtrOffsetReg = AMDGPU::SGPR32;
- return;
+
+ // FIXME: Not really a system SGPR.
+ PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
}
CallingConv::ID CC = F->getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- KernargSegmentPtr = true;
+ KernargSegmentPtr = !F->arg_empty();
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
@@ -101,17 +106,25 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (ST.debuggerEmitPrologue()) {
// Enable everything.
+ WorkGroupIDX = true;
WorkGroupIDY = true;
WorkGroupIDZ = true;
+ WorkItemIDX = true;
WorkItemIDY = true;
WorkItemIDZ = true;
} else {
+ if (F->hasFnAttribute("amdgpu-work-group-id-x"))
+ WorkGroupIDX = true;
+
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
WorkGroupIDY = true;
if (F->hasFnAttribute("amdgpu-work-group-id-z"))
WorkGroupIDZ = true;
+ if (F->hasFnAttribute("amdgpu-work-item-id-x"))
+ WorkItemIDX = true;
+
if (F->hasFnAttribute("amdgpu-work-item-id-y"))
WorkItemIDY = true;
@@ -119,25 +132,28 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDZ = true;
}
- // X, XY, and XYZ are the only supported combinations, so make sure Y is
- // enabled if Z is.
- if (WorkItemIDZ)
- WorkItemIDY = true;
-
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
- bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls();
+ bool HasStackObjects = FrameInfo.hasStackObjects();
+
+ if (isEntryFunction()) {
+ // X, XY, and XYZ are the only supported combinations, so make sure Y is
+ // enabled if Z is.
+ if (WorkItemIDZ)
+ WorkItemIDY = true;
- if (HasStackObjects || MaySpill) {
- PrivateSegmentWaveByteOffset = true;
+ if (HasStackObjects || MaySpill) {
+ PrivateSegmentWaveByteOffset = true;
- // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
- if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
- (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
- PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+ // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
+ (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
+ PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+ }
}
- if (ST.isAmdCodeObjectV2(MF)) {
+ bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
+ if (IsCOV2) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -154,11 +170,15 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ImplicitBufferPtr = true;
}
- // We don't need to worry about accessing spills with flat instructions.
- // TODO: On VI where we must use flat for global, we should be able to omit
- // this if it is never used for generic access.
- if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
- FlatScratchInit = true;
+ if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
+ KernargSegmentPtr = true;
+
+ if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls that may require it before argument lowering.
+ if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
+ FlatScratchInit = true;
+ }
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 05aa249584bf..4c7f38a09a48 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -119,6 +119,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned WorkGroupInfoSystemSGPR;
unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
+ // VGPR inputs. These are always v0, v1 and v2 for entry functions.
+ unsigned WorkItemIDXVGPR;
+ unsigned WorkItemIDYVGPR;
+ unsigned WorkItemIDZVGPR;
+
// Graphics info.
unsigned PSInputAddr;
unsigned PSInputEnable;
@@ -377,10 +382,13 @@ public:
}
void setStackPtrOffsetReg(unsigned Reg) {
- assert(Reg != AMDGPU::NoRegister && "Should never be unset");
StackPtrOffsetReg = Reg;
}
+ // Note the unset value for this is AMDGPU::SP_REG rather than
+ // NoRegister. This is mostly a workaround for MIR tests where state that
+ // can't be directly computed from the function is not preserved in serialized
+ // MIR.
unsigned getStackPtrOffsetReg() const {
return StackPtrOffsetReg;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ef6ad4ad0c8f..4a3fbb4593bb 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -207,7 +207,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
}
+ // We have to assume the SP is needed in case there are calls in the function,
+ // which is detected after the function is lowered. If we aren't really going
+ // to need SP, don't bother reserving it.
unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
+
if (StackPtrReg != AMDGPU::NoRegister) {
reserveRegisterTuples(Reserved, StackPtrReg);
assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index fc808011cd88..54ea7805e18d 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -23,6 +23,13 @@ class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
+// Pseudo-registers: Used as placeholders during isel and immediately
+// replaced, never seeing the verifier.
+def PRIVATE_RSRC_REG : SIReg<"", 0>;
+def FP_REG : SIReg<"", 0>;
+def SP_REG : SIReg<"", 0>;
+def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>;
+
// VCC for 64-bit instructions
def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
DwarfRegAlias<VCC_LO> {
@@ -267,7 +274,8 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
- SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
+ SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT,
+ FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> {
let AllocationPriority = 7;
}
@@ -314,7 +322,8 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128R
let isAllocatable = 0;
}
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> {
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32,
+ (add SGPR_128, TTMP_128)> {
let AllocationPriority = 10;
}
@@ -464,7 +473,9 @@ defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
defm VSrc : RegImmOperand<"VS", "VSrc">;
-def VSrc_128 : RegisterOperand<VReg_128>;
+def VSrc_128 : RegisterOperand<VReg_128> {
+ let DecoderMethod = "DecodeVS_128RegisterClass";
+}
//===----------------------------------------------------------------------===//
// VSrc_* Operands with an VGPR
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 26515b27bb77..67ad904ca972 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -539,23 +539,9 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
}
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
-
- if (Reg0 == Reg1) {
- return true;
+ for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
+ if (*R == Reg1) return true;
}
-
- unsigned SubReg0 = TRI->getSubReg(Reg0, 1);
- if (SubReg0 == 0) {
- return TRI->getSubRegIndex(Reg1, Reg0) > 0;
- }
-
- for (unsigned Idx = 2; SubReg0 > 0; ++Idx) {
- if (isRegIntersect(Reg1, SubReg0, TRI)) {
- return true;
- }
- SubReg0 = TRI->getSubReg(Reg0, Idx);
- }
-
return false;
}
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index 7b9bc71ad4c7..d5acb49b4f39 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -117,7 +117,10 @@ class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (node (P.Src0VT
+ !if(P.HasOMod,
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
}
@@ -813,9 +816,11 @@ let SubtargetPredicate = isVI in {
// Aliases to simplify matching of floating-point instructions that
// are VOP2 on SI and VOP3 on VI.
-class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
+class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias <
name#" $dst, $src0, $src1",
- (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
+ !if(inst.Pfl.HasOMod,
+ (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0),
+ (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0))
>, PredicateControl {
let UseInstAsmMatchConverter = 0;
let AsmVariantName = AMDGPUAsmVariants.VOP3;
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index a8ca593f14ed..92ed0706dc01 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -12,17 +12,21 @@
//===----------------------------------------------------------------------===//
class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
+ dag src0 = !if(P.HasOMod,
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
+
list<dag> ret3 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (node (P.Src0VT src0),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (node (P.Src0VT src0),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod))))];
+ (node (P.Src0VT src0)))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -92,6 +96,7 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
// v_div_scale_{f32|f64} do not support input modifiers.
let HasModifiers = 0;
+ let HasOMod = 0;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
}
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
index f2de1f995726..3becf758aaa3 100644
--- a/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -34,6 +34,9 @@ class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_
let isCommutable = 1 in {
def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
+def V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
+def V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
+
def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum>;
@@ -41,7 +44,6 @@ def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>
def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
-def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, mul>;
def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smin>;
@@ -50,6 +52,9 @@ def V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>
def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
}
+def V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
+
def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshl_rev>;
def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
@@ -71,6 +76,7 @@ multiclass VOP3P_Real_vi<bits<10> op> {
}
}
+defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>;
defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
@@ -79,8 +85,10 @@ defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
+defm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>;
defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
+defm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>;
defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td
index f3482a22d5dc..b636fc9be431 100644
--- a/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/lib/Target/AMDGPU/VOPCInstructions.td
@@ -148,6 +148,19 @@ class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
let SubtargetPredicate = AssemblerPredicate;
}
+class getVOPCPat64 <PatLeaf cond, VOPProfile P> : LetDummies {
+ list<dag> ret = !if(P.HasModifiers,
+ [(set i1:$sdst,
+ (setcc (P.Src0VT
+ !if(P.HasOMod,
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ cond))],
+ [(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]);
+}
+
+
multiclass VOPC_Pseudos <string opName,
VOPC_Profile P,
PatLeaf cond = COND_NULL,
@@ -163,14 +176,7 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
- def _e64 : VOP3_Pseudo<opName, P,
- !if(P.HasModifiers,
- [(set i1:$sdst,
- (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
- i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- cond))],
- [(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))])>,
+ def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = P.Schedule;
@@ -634,7 +640,7 @@ class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
(inst $src0_modifiers, $src0, $src1_modifiers, $src1,
- DSTCLAMP.NONE, DSTOMOD.NONE)
+ DSTCLAMP.NONE)
>;
def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index 77b7952b22a8..b47538ba0349 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -136,6 +136,8 @@ class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
let TSFlags = ps.TSFlags;
let UseNamedOperandTable = ps.UseNamedOperandTable;
let Uses = ps.Uses;
+
+ VOPProfile Pfl = ps.Pfl;
}
// XXX - Is there any reason to distingusih this from regular VOP3
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index c40b4450a5b5..e49c1babac21 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -17,144 +17,172 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
-// ARM Helper classes.
+// ARM Subtarget state.
//
-class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode",
+ "true", "Thumb mode">;
+
+def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat",
+ "true", "Use software floating "
+ "point features.">;
-class Architecture<string fname, string aname, list<SubtargetFeature> features >
- : SubtargetFeature<fname, "ARMArch", aname,
- !strconcat(aname, " architecture"), features>;
//===----------------------------------------------------------------------===//
-// ARM Subtarget state.
+// ARM Subtarget features.
//
-def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
- "Thumb mode">;
+// Floating Point, HW Division and Neon Support
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
+ "Enable VFP2 instructions">;
-def ModeSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
- "Use software floating point features.">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+ "Enable VFP3 instructions",
+ [FeatureVFP2]>;
-//===----------------------------------------------------------------------===//
-// ARM Subtarget features.
-//
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable NEON instructions",
+ [FeatureVFP3]>;
+
+def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
+ "Enable half-precision "
+ "floating point">;
+
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+ "Enable VFP4 instructions",
+ [FeatureVFP3, FeatureFP16]>;
+
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
+ "true", "Enable ARMv8 FP",
+ [FeatureVFP4]>;
+
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Enable full half-precision "
+ "floating point",
+ [FeatureFPARMv8]>;
+
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports "
+ "single precision only">;
+
+def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
+ "Restrict FP to 16 double registers">;
+
+def FeatureHWDivThumb : SubtargetFeature<"hwdiv",
+ "HasHardwareDivideInThumb", "true",
+ "Enable divide instructions in Thumb">;
+
+def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
+ "HasHardwareDivideInARM", "true",
+ "Enable divide instructions in ARM mode">;
+
+// Atomic Support
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb/dsb) instructions">;
+
+def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
+ "Has v7 clrex instruction">;
-def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
- "Enable VFP2 instructions">;
-def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
- "Enable VFP3 instructions",
- [FeatureVFP2]>;
-def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
- "Enable NEON instructions",
- [FeatureVFP3]>;
-def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
- "Enable Thumb2 instructions">;
-def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
- "Does not support ARM mode execution",
- [ModeThumb]>;
-def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
- "Enable half-precision floating point">;
-def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
- "Enable VFP4 instructions",
- [FeatureVFP3, FeatureFP16]>;
-def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
- "true", "Enable ARMv8 FP",
- [FeatureVFP4]>;
-def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
- "Enable full half-precision floating point",
- [FeatureFPARMv8]>;
-def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
- "Restrict FP to 16 double registers">;
-def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb",
- "true",
- "Enable divide instructions in Thumb">;
-def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
- "HasHardwareDivideInARM", "true",
- "Enable divide instructions in ARM mode">;
-def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
- "Has data barrier (dmb / dsb) instructions">;
-def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
- "Has v7 clrex instruction">;
def FeatureAcquireRelease : SubtargetFeature<"acquire-release",
"HasAcquireRelease", "true",
- "Has v8 acquire/release (lda/ldaex etc) instructions">;
-def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
- "FP compare + branch is slow">;
-def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
- "Floating point unit supports single precision only">;
-def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
- "Enable support for Performance Monitor extensions">;
-def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
- "Enable support for TrustZone security extensions">;
-def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true",
- "Enable support for ARMv8-M Security Extensions">;
-def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
- "Enable support for Cryptography extensions",
- [FeatureNEON]>;
-def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
- "Enable support for CRC instructions">;
+ "Has v8 acquire/release (lda/ldaex "
+ " etc) instructions">;
+
+
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+ "FP compare + branch is slow">;
+
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+ "Enable support for Performance "
+ "Monitor extensions">;
+
+
+// TrustZone Security Extensions
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+ "Enable support for TrustZone "
+ "security extensions">;
+
+def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true",
+ "Enable support for ARMv8-M "
+ "Security Extensions">;
+
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable support for "
+ "Cryptography extensions",
+ [FeatureNEON]>;
+
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+ "Enable support for CRC instructions">;
+
+
// Not to be confused with FeatureHasRetAddrStack (return address stack)
-def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
- "Enable Reliability, Availability and Serviceability extensions">;
-def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
- "Enable fast computation of positive address offsets">;
-def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
- "CPU fuses AES crypto operations">;
-
-// Cyclone has preferred instructions for zeroing VFP registers, which can
-// execute in 0 cycles.
-def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
- "Has zero-cycle zeroing instructions">;
-
-// Whether or not it may be profitable to unpredicate certain instructions
-// during if conversion.
+def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
+ "Enable Reliability, Availability "
+ "and Serviceability extensions">;
+
+// Fast computation of non-negative address offsets
+def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
+ "Enable fast computation of "
+ "positive address offsets">;
+
+// Fast execution of AES crypto operations
+def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
+ "CPU fuses AES crypto operations">;
+
+// Cyclone can zero VFP registers in 0 cycles.
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+ "Has zero-cycle zeroing instructions">;
+
+// Whether it is profitable to unpredicate certain instructions during if-conversion
def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr",
- "IsProfitableToUnpredicate",
- "true",
+ "IsProfitableToUnpredicate", "true",
"Is profitable to unpredicate">;
// Some targets (e.g. Swift) have microcoded VGETLNi32.
-def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32",
- "HasSlowVGETLNi32", "true",
- "Has slow VGETLNi32 - prefer VMOV">;
+def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32",
+ "HasSlowVGETLNi32", "true",
+ "Has slow VGETLNi32 - prefer VMOV">;
// Some targets (e.g. Swift) have microcoded VDUP32.
-def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true",
- "Has slow VDUP32 - prefer VMOV">;
+def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32",
+ "true",
+ "Has slow VDUP32 - prefer VMOV">;
// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON
// for scalar FP, as this allows more effective execution domain optimization.
-def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
- "true", "Prefer VMOVSR">;
+def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
+ "true", "Prefer VMOVSR">;
// Swift has ISHST barriers compatible with Atomic Release semantics but weaker
// than ISH
def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST",
- "true", "Prefer ISHST barriers">;
+ "true", "Prefer ISHST barriers">;
// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU.
-def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true",
- "Has muxed AGU and NEON/FPU">;
+def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits",
+ "true",
+ "Has muxed AGU and NEON/FPU">;
-// On some targets, a VLDM/VSTM starting with an odd register number needs more
-// microops than single VLDRS.
+// Whether VLDM/VSTM starting with odd register number need more microops
+// than single VLDRS
def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister",
- "true", "VLDM/VSTM starting with an odd register is slow">;
+ "true", "VLDM/VSTM starting "
+ "with an odd register is slow">;
// Some targets have a renaming dependency when loading into D subregisters.
def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
"SlowLoadDSubregister", "true",
"Loading into D subregs is slow">;
+
// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
"DontWidenVMOVS", "true",
"Don't widen VMOVS to VMOVD">;
// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
-def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true",
- "Expand VFP/NEON MLA/MLS instructions">;
+def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx",
+ "ExpandMLx", "true",
+ "Expand VFP/NEON MLA/MLS instructions">;
// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS.
def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",
@@ -162,15 +190,18 @@ def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",
// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
// VFP to NEON, as an execution domain optimization.
-def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs",
- "true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">;
+def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs",
+ "UseNEONForFPMovs", "true",
+ "Convert VMOVSR, VMOVRS, "
+ "VMOVS to NEON">;
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations. This affects instruction selection and should
// only be enabled if the handling of denormals is not important.
-def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
- "true",
- "Use NEON for single precision FP">;
+def FeatureNEONForFP : SubtargetFeature<"neonfp",
+ "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
// On some processors, VLDn instructions that access unaligned data take one
// extra cycle. Take that into account when computing operand latencies.
@@ -181,18 +212,18 @@ def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign",
// Some processors have a nonpipelined VFP coprocessor.
def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
"NonpipelinedVFP", "true",
- "VFP instructions are not pipelined">;
+ "VFP instructions are not pipelined">;
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
// to just not use them.
-def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
- "Disable VFP / NEON MAC instructions">;
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+ "Disable VFP / NEON MAC instructions">;
// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
- "HasVMLxForwarding", "true",
- "Has multiplier accumulator forwarding">;
+ "HasVMLxForwarding", "true",
+ "Has multiplier accumulator forwarding">;
// Disable 32-bit to 16-bit narrowing for experimentation.
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
@@ -213,14 +244,16 @@ def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr",
"true",
"Disable +1 predication cost for instructions updating CPSR">;
-def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
- "AvoidMOVsShifterOperand", "true",
- "Avoid movs instructions with shifter operand">;
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+ "AvoidMOVsShifterOperand", "true",
+ "Avoid movs instructions with "
+ "shifter operand">;
// Some processors perform return stack prediction. CodeGen should avoid issue
// "normal" call instructions to callees which do not return.
-def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true",
- "Has return address stack">;
+def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack",
+ "HasRetAddrStack", "true",
+ "Has return address stack">;
// Some processors have no branch predictor, which changes the expected cost of
// taking a branch which affects the choice of whether to use predicated
@@ -230,63 +263,80 @@ def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor",
"Has no branch predictor">;
/// DSP extension.
-def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
- "Supports DSP instructions in ARM and/or Thumb2">;
+def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
+ "Supports DSP instructions in "
+ "ARM and/or Thumb2">;
// Multiprocessing extension.
-def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
- "Supports Multiprocessing extension">;
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8).
def FeatureVirtualization : SubtargetFeature<"virtualization",
- "HasVirtualization", "true",
- "Supports Virtualization extension",
- [FeatureHWDivThumb, FeatureHWDivARM]>;
+ "HasVirtualization", "true",
+ "Supports Virtualization extension",
+ [FeatureHWDivThumb, FeatureHWDivARM]>;
-// M-series ISA
-def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
- "Is microcontroller profile ('M' series)">;
+// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
+// See ARMInstrInfo.td for details.
+def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
+ "NaCl trap">;
-// R-series ISA
-def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
- "Is realtime profile ('R' series)">;
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
+def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
+ "Generate calls via indirect call "
+ "instructions">;
+
+def FeatureExecuteOnly : SubtargetFeature<"execute-only",
+ "GenExecuteOnly", "true",
+ "Enable the generation of "
+ "execute only code.">;
+
+def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
+ "Reserve R9, making it unavailable"
+ " as GPR">;
+
+def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
+ "Don't use movt/movw pairs for "
+ "32-bit imms">;
+
+def FeatureNoNegativeImmediates
+ : SubtargetFeature<"no-neg-immediates",
+ "NegativeImmediates", "false",
+ "Convert immediates and instructions "
+ "to their negated or complemented "
+ "equivalent when the immediate does "
+ "not fit in the encoding.">;
+
+
+//===----------------------------------------------------------------------===//
+// ARM architecture class
+//
// A-series ISA
def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
"Is application profile ('A' series)">;
-// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
-// See ARMInstrInfo.td for details.
-def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
- "NaCl trap">;
-
-def FeatureStrictAlign : SubtargetFeature<"strict-align",
- "StrictAlign", "true",
- "Disallow all unaligned memory "
- "access">;
+// R-series ISA
+def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
+ "Is realtime profile ('R' series)">;
-def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
- "Generate calls via indirect call "
- "instructions">;
+// M-series ISA
+def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
+ "Is microcontroller profile ('M' series)">;
-def FeatureExecuteOnly
- : SubtargetFeature<"execute-only", "GenExecuteOnly", "true",
- "Enable the generation of execute only code.">;
-def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
- "Reserve R9, making it unavailable as "
- "GPR">;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
+ "Enable Thumb2 instructions">;
-def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
- "Don't use movt/movw pairs for 32-bit "
- "imms">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution",
+ [ModeThumb]>;
-def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
- "NegativeImmediates", "false",
- "Convert immediates and instructions "
- "to their negated or complemented "
- "equivalent when the immediate does "
- "not fit in the encoding.">;
//===----------------------------------------------------------------------===//
// ARM ISAa.
@@ -294,43 +344,57 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
+
def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
"Support ARM v5T instructions",
[HasV4TOps]>;
+
def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
- "Support ARM v5TE, v5TEj, and v5TExp instructions",
+ "Support ARM v5TE, v5TEj, and "
+ "v5TExp instructions",
[HasV5TOps]>;
+
def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
"Support ARM v6 instructions",
[HasV5TEOps]>;
+
def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
"Support ARM v6M instructions",
[HasV6Ops]>;
+
def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true",
"Support ARM v8M Baseline instructions",
[HasV6MOps]>;
+
def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true",
"Support ARM v6k instructions",
[HasV6Ops]>;
+
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
[HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>;
+
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops, FeaturePerfMon,
FeatureV7Clrex]>;
+
+def HasV8MMainlineOps :
+ SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true",
+ "Support ARM v8M Mainline instructions",
+ [HasV7Ops]>;
+
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
[HasV7Ops, FeatureAcquireRelease]>;
+
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
[HasV8Ops]>;
-def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions",
[HasV8_1aOps]>;
-def HasV8MMainlineOps : SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true",
- "Support ARM v8M Mainline instructions",
- [HasV7Ops]>;
//===----------------------------------------------------------------------===//
@@ -386,11 +450,17 @@ def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52",
def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
"Cortex-M3 ARM processors", []>;
+
//===----------------------------------------------------------------------===//
-// ARM schedules.
+// ARM Helper classes.
//
-include "ARMSchedule.td"
+class Architecture<string fname, string aname, list<SubtargetFeature> features>
+ : SubtargetFeature<fname, "ARMArch", aname,
+ !strconcat(aname, " architecture"), features>;
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
//===----------------------------------------------------------------------===//
@@ -547,12 +617,21 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
//===----------------------------------------------------------------------===//
+// ARM schedules.
+//===----------------------------------------------------------------------===//
+//
+include "ARMSchedule.td"
+
+//===----------------------------------------------------------------------===//
// ARM processors
//
// Dummy CPU, used to target architectures
def : ProcessorModel<"generic", CortexA8Model, []>;
+// FIXME: Several processors below are not using their own scheduler
+// model, but one of similar/previous processor. These should be fixed.
+
def : ProcNoItin<"arm8", [ARMv4]>;
def : ProcNoItin<"arm810", [ARMv4]>;
def : ProcNoItin<"strongarm", [ARMv4]>;
@@ -612,7 +691,6 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2,
FeatureVFP2,
FeatureHasSlowFPVMLx]>;
-// FIXME: A5 has currently the same Schedule model as A8
def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
FeatureHasRetAddrStack,
FeatureTrustZone,
@@ -656,7 +734,6 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
FeatureCheckVLDnAlign,
FeatureMP]>;
-// FIXME: A12 has currently the same Schedule model as A9
def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
FeatureHasRetAddrStack,
FeatureTrustZone,
@@ -666,7 +743,6 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
FeatureVirtualization,
FeatureMP]>;
-// FIXME: A15 has currently the same Schedule model as A9.
def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureDontWidenVMOVS,
FeatureHasRetAddrStack,
@@ -678,7 +754,6 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureAvoidPartialCPSR,
FeatureVirtualization]>;
-// FIXME: A17 has currently the same Schedule model as A9
def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
FeatureHasRetAddrStack,
FeatureTrustZone,
@@ -688,9 +763,7 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
FeatureAvoidPartialCPSR,
FeatureVirtualization]>;
-// FIXME: krait has currently the same Schedule model as A9
-// FIXME: krait has currently the same features as A9 plus VFP4 and hardware
-// division features.
+// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv
def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
FeatureHasRetAddrStack,
FeatureMuxedUnits,
@@ -720,12 +793,10 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
FeatureSlowVGETLNi32,
FeatureSlowVDUP32]>;
-// FIXME: R4 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasRetAddrStack,
FeatureAvoidPartialCPSR]>;
-// FIXME: R4F has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasRetAddrStack,
FeatureSlowFPBrcc,
@@ -734,7 +805,6 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
FeatureD16,
FeatureAvoidPartialCPSR]>;
-// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
FeatureHasRetAddrStack,
FeatureVFP3,
@@ -744,7 +814,6 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR]>;
-// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
FeatureHasRetAddrStack,
FeatureVFP3,
@@ -814,14 +883,14 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
FeatureCRC,
FeatureFPAO]>;
-def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC,
- FeatureFPAO,
- FeatureAvoidPartialCPSR,
- FeatureCheapPredicableCPSR]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFPAO,
+ FeatureAvoidPartialCPSR,
+ FeatureCheapPredicableCPSR]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
FeatureHWDivThumb,
@@ -835,7 +904,6 @@ def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
FeatureCrypto,
FeatureCRC]>;
-// Cyclone is very similar to swift
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
@@ -881,9 +949,7 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
//===----------------------------------------------------------------------===//
include "ARMRegisterInfo.td"
-
include "ARMRegisterBanks.td"
-
include "ARMCallingConv.td"
//===----------------------------------------------------------------------===//
@@ -891,7 +957,6 @@ include "ARMCallingConv.td"
//===----------------------------------------------------------------------===//
include "ARMInstrInfo.td"
-
def ARMInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
@@ -912,7 +977,7 @@ def ARMAsmParserVariant : AsmParserVariant {
}
def ARM : Target {
- // Pull in Instruction Info:
+ // Pull in Instruction Info.
let InstructionSet = ARMInstrInfo;
let AssemblyWriters = [ARMAsmWriter];
let AssemblyParserVariants = [ARMAsmParserVariant];
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e97a7ce5067f..370c0a7f5c53 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -117,7 +117,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (CC == CallingConv::GHC)
- // This is academic becase all GHC calls are (supposed to be) tail calls
+ // This is academic because all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() &&
@@ -163,7 +163,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
// both or otherwise does not want to enable this optimization, the function
// should return NULL
if (CC == CallingConv::GHC)
- // This is academic becase all GHC calls are (supposed to be) tail calls
+ // This is academic because all GHC calls are (supposed to be) tail calls
return nullptr;
return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask
: CSR_AAPCS_ThisReturn_RegMask;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 384f80356cc8..bf00ef61c2d1 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -250,8 +250,7 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
return false;
// Look to see if our OptionalDef is defining CPSR or CCR.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
if (MO.getReg() == ARM::CPSR)
*CPSR = true;
@@ -267,8 +266,8 @@ bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
AFI->isThumb2Function())
return MI->isPredicable();
- for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
- if (MCID.OpInfo[i].isPredicate())
+ for (const MCOperandInfo &opInfo : MCID.operands())
+ if (opInfo.isPredicate())
return true;
return false;
@@ -1972,7 +1971,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
break;
}
case CCValAssign::AExt:
- // Intentional fall-through. Handle AExt and ZExt.
+ // Intentional fall-through. Handle AExt and ZExt.
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
@@ -2001,6 +2000,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
assert(VA.getLocVT() == MVT::f64 &&
"Custom lowering for v2f64 args not available");
+ // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
CCValAssign &NextVA = ArgLocs[++i];
assert(VA.isRegLoc() && NextVA.isRegLoc() &&
@@ -2172,8 +2172,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(RetOpc));
AddOptionalDefs(MIB);
- for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
- MIB.addReg(RetRegs[i], RegState::Implicit);
+ for (unsigned R : RetRegs)
+ MIB.addReg(R, RegState::Implicit);
return true;
}
@@ -2233,8 +2233,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
ArgRegs.reserve(I->getNumOperands());
ArgVTs.reserve(I->getNumOperands());
ArgFlags.reserve(I->getNumOperands());
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value *Op = I->getOperand(i);
+ for (Value *Op : I->operands()) {
unsigned Arg = getRegForValue(Op);
if (Arg == 0) return false;
@@ -2278,8 +2277,8 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
MIB.addExternalSymbol(TLI.getLibcallName(Call));
// Add implicit physical register uses to the call.
- for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i], RegState::Implicit);
+ for (unsigned R : RegArgs)
+ MIB.addReg(R, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
@@ -2423,8 +2422,8 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MIB.addExternalSymbol(IntrMemName, 0);
// Add implicit physical register uses to the call.
- for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i], RegState::Implicit);
+ for (unsigned R : RegArgs)
+ MIB.addReg(R, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
@@ -2932,13 +2931,12 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
bool Found = false;
bool isZExt;
- for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
- i != e; ++i) {
- if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
- (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
- MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
+ for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
+ if (FLE.Opc[isThumb2] == MI->getOpcode() &&
+ (uint64_t)FLE.ExpectedImm == Imm &&
+ MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
Found = true;
- isZExt = FoldableLoadExtends[i].isZExt;
+ isZExt = FLE.isZExt;
}
}
if (!Found) return false;
@@ -3057,9 +3055,8 @@ bool ARMFastISel::fastLowerArguments() {
};
const TargetRegisterClass *RC = &ARM::rGPRRegClass;
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I) {
- unsigned ArgNo = I->getArgNo();
+ for (const Argument &Arg : F->args()) {
+ unsigned ArgNo = Arg.getArgNo();
unsigned SrcReg = GPRArgRegs[ArgNo];
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
@@ -3069,7 +3066,7 @@ bool ARMFastISel::fastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(DstReg, getKillRegState(true));
- updateValueMap(&*I, ResultReg);
+ updateValueMap(&Arg, ResultReg);
}
return true;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 7f9fe55a5c38..f75dd4de3f96 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2682,9 +2682,12 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDNode *ResNode;
if (Subtarget->isThumb()) {
- SDValue Pred = getAL(CurDAG, dl);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+ SDValue Ops[] = {
+ CPIdx,
+ getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
Ops);
} else {
@@ -2698,6 +2701,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
Ops);
}
+ // Annotate the Node with memory operand information so that MachineInstr
+ // queries work properly. This e.g. gives the register allocation the
+ // required information for rematerialization.
+ MachineFunction& MF = CurDAG->getMachineFunction();
+ MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
+ MemOp[0] = MF.getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(MF),
+ MachineMemOperand::MOLoad, 4, 4);
+
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp+1);
+
ReplaceNode(N, ResNode);
return;
}
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 29ef69ad0010..faed6b867e2b 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -722,6 +722,29 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
return false;
break;
}
+ case G_BRCOND: {
+ if (!validReg(MRI, I.getOperand(0).getReg(), 1, ARM::GPRRegBankID)) {
+ DEBUG(dbgs() << "Unsupported condition register for G_BRCOND");
+ return false;
+ }
+
+ // Set the flags.
+ auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri))
+ .addReg(I.getOperand(0).getReg())
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI))
+ return false;
+
+ // Branch conditionally.
+ auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc))
+ .add(I.getOperand(1))
+ .add(predOps(ARMCC::EQ, ARM::CPSR));
+ if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index f23e62595d2e..1c17c07e4cb0 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -66,14 +66,16 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Libcall);
}
- // FIXME: Support s8 and s16 as well
- for (unsigned Op : {G_SREM, G_UREM})
+ for (unsigned Op : {G_SREM, G_UREM}) {
+ for (auto Ty : {s8, s16})
+ setAction({Op, Ty}, WidenScalar);
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Lower);
else if (AEABI(ST))
setAction({Op, s32}, Custom);
else
setAction({Op, s32}, Libcall);
+ }
for (unsigned Op : {G_SEXT, G_ZEXT}) {
setAction({Op, s32}, Legal);
@@ -88,6 +90,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_SELECT, p0}, Legal);
setAction({G_SELECT, 1, s1}, Legal);
+ setAction({G_BRCOND, s1}, Legal);
+
setAction({G_CONSTANT, s32}, Legal);
for (auto Ty : {s1, s8, s16})
setAction({G_CONSTANT, Ty}, WidenScalar);
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 13acea3c28a9..48b02d40b246 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -153,9 +153,7 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
break;
}
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
if (AP.lowerOperand(MO, MCOp)) {
if (MCOp.isImm() && EncodeImms) {
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index c0c09e8c15af..844930235894 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -259,6 +259,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
case G_SELECT: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ (void)Ty;
LLT Ty2 = MRI.getType(MI.getOperand(1).getReg());
(void)Ty2;
assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT");
@@ -282,6 +283,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case G_FCMP: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ (void)Ty;
LLT Ty1 = MRI.getType(MI.getOperand(2).getReg());
LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
(void)Ty2;
@@ -329,6 +331,13 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
&ARM::ValueMappings[ARM::DPR3OpsIdx]});
break;
}
+ case G_BR:
+ OperandsMapping = getOperandsMapping({nullptr});
+ break;
+ case G_BRCOND:
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
+ break;
default:
return getInvalidInstructionMapping();
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index f41da3e8e223..22ce949367f3 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -47,6 +47,8 @@ public:
~ARMBaseTargetMachine() override;
const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
+ // The no argument getSubtargetImpl, while it exists on some targets, is
+ // deprecated and should not be used.
const ARMSubtarget *getSubtargetImpl() const = delete;
bool isLittleEndian() const { return isLittle; }
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index cc7a7c3849bc..81b0aa7f8b98 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -515,8 +515,9 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
+ bool isSelectOp = MI.getOpcode() == BPF::Select;
- assert(MI.getOpcode() == BPF::Select && "Unexpected instr type to insert");
+ assert((isSelectOp || MI.getOpcode() == BPF::Select_Ri) && "Unexpected instr type to insert");
// To "insert" a SELECT instruction, we actually have to insert the diamond
// control-flow pattern. The incoming instruction knows the destination vreg
@@ -548,48 +549,40 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Insert Branch if Flag
unsigned LHS = MI.getOperand(1).getReg();
- unsigned RHS = MI.getOperand(2).getReg();
int CC = MI.getOperand(3).getImm();
+ int NewCC;
switch (CC) {
case ISD::SETGT:
- BuildMI(BB, DL, TII.get(BPF::JSGT_rr))
- .addReg(LHS)
- .addReg(RHS)
- .addMBB(Copy1MBB);
+ NewCC = isSelectOp ? BPF::JSGT_rr : BPF::JSGT_ri;
break;
case ISD::SETUGT:
- BuildMI(BB, DL, TII.get(BPF::JUGT_rr))
- .addReg(LHS)
- .addReg(RHS)
- .addMBB(Copy1MBB);
+ NewCC = isSelectOp ? BPF::JUGT_rr : BPF::JUGT_ri;
break;
case ISD::SETGE:
- BuildMI(BB, DL, TII.get(BPF::JSGE_rr))
- .addReg(LHS)
- .addReg(RHS)
- .addMBB(Copy1MBB);
+ NewCC = isSelectOp ? BPF::JSGE_rr : BPF::JSGE_ri;
break;
case ISD::SETUGE:
- BuildMI(BB, DL, TII.get(BPF::JUGE_rr))
- .addReg(LHS)
- .addReg(RHS)
- .addMBB(Copy1MBB);
+ NewCC = isSelectOp ? BPF::JUGE_rr : BPF::JUGE_ri;
break;
case ISD::SETEQ:
- BuildMI(BB, DL, TII.get(BPF::JEQ_rr))
- .addReg(LHS)
- .addReg(RHS)
- .addMBB(Copy1MBB);
+ NewCC = isSelectOp ? BPF::JEQ_rr : BPF::JEQ_ri;
break;
case ISD::SETNE:
- BuildMI(BB, DL, TII.get(BPF::JNE_rr))
- .addReg(LHS)
- .addReg(RHS)
- .addMBB(Copy1MBB);
+ NewCC = isSelectOp ? BPF::JNE_rr : BPF::JNE_ri;
break;
default:
report_fatal_error("unimplemented select CondCode " + Twine(CC));
}
+ if (isSelectOp)
+ BuildMI(BB, DL, TII.get(NewCC))
+ .addReg(LHS)
+ .addReg(MI.getOperand(2).getReg())
+ .addMBB(Copy1MBB);
+ else
+ BuildMI(BB, DL, TII.get(NewCC))
+ .addReg(LHS)
+ .addImm(MI.getOperand(2).getImm())
+ .addMBB(Copy1MBB);
// Copy0MBB:
// %FalseValue = ...
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index 5ad777268208..f68357809add 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -460,6 +460,11 @@ let usesCustomInserter = 1 in {
"# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2",
[(set i64:$dst,
(BPFselectcc i64:$lhs, i64:$rhs, (i64 imm:$imm), i64:$src, i64:$src2))]>;
+ def Select_Ri : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, i64imm:$rhs, i64imm:$imm, GPR:$src, GPR:$src2),
+ "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2",
+ [(set i64:$dst,
+ (BPFselectcc i64:$lhs, (i64 imm:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>;
}
// load 64-bit global addr into register
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index b064778c4bbd..d75d95a6baea 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexbit"
-
#include "HexagonBitTracker.h"
#include "HexagonTargetMachine.h"
#include "llvm/ADT/BitVector.h"
@@ -42,6 +40,8 @@
#include <utility>
#include <vector>
+#define DEBUG_TYPE "hexbit"
+
using namespace llvm;
static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td
index 2dc74632e9be..30ebf89c9808 100644
--- a/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -45863,6 +45863,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000000;
let isSoloAin1 = 1;
+let hasSideEffects = 1;
}
def Y2_dccleaninva : HInst<
(outs),
@@ -45872,6 +45873,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000010;
let isSoloAin1 = 1;
+let hasSideEffects = 1;
}
def Y2_dcfetch : HInst<
(outs),
@@ -45900,6 +45902,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000001;
let isSoloAin1 = 1;
+let hasSideEffects = 1;
}
def Y2_dczeroa : HInst<
(outs),
@@ -45909,6 +45912,7 @@ tc_30665cb0, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000110;
let isSoloAin1 = 1;
+let hasSideEffects = 1;
let mayStore = 1;
}
def Y2_icinva : HInst<
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 03c4a83594b3..80361015e649 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -59,8 +59,6 @@
// J2_jump <BB#6>, %PC<imp-def,dead>
// Successors according to CFG: BB#6 BB#3
-#define DEBUG_TYPE "hexagon-eif"
-
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
@@ -90,6 +88,8 @@
#include <cassert>
#include <iterator>
+#define DEBUG_TYPE "hexagon-eif"
+
using namespace llvm;
namespace llvm {
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 734f3c6658d9..a2f6dd68c1a1 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -86,8 +86,6 @@
// however, is that finding the locations where the implicit uses need
// to be added, and updating the live ranges will be more involved.
-#define DEBUG_TYPE "expand-condsets"
-
#include "HexagonInstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
@@ -116,6 +114,8 @@
#include <set>
#include <utility>
+#define DEBUG_TYPE "expand-condsets"
+
using namespace llvm;
static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index c790579ccebc..e5e75198b2d1 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -8,8 +8,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon-pei"
-
#include "HexagonFrameLowering.h"
#include "HexagonBlockRanges.h"
#include "HexagonInstrInfo.h"
@@ -63,6 +61,8 @@
#include <utility>
#include <vector>
+#define DEBUG_TYPE "hexagon-pei"
+
// Hexagon stack frame layout as defined by the ABI:
//
// Incoming arguments
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index bf31e1699284..0a955aedaf1a 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexinsert"
-
#include "BitTracker.h"
#include "HexagonBitTracker.h"
#include "HexagonInstrInfo.h"
@@ -44,6 +42,8 @@
#include <utility>
#include <vector>
+#define DEBUG_TYPE "hexinsert"
+
using namespace llvm;
static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U),
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 3470480d607d..2da211563e0a 100644
--- a/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "gen-pred"
-
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/SetVector.h"
@@ -35,6 +33,8 @@
#include <set>
#include <utility>
+#define DEBUG_TYPE "gen-pred"
+
using namespace llvm;
namespace llvm {
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 67242764d453..3997702bc962 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1286,16 +1286,6 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV));
}
-// Creates a SPLAT instruction for a constant value VAL.
-static SDValue createSplat(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
- SDValue Val) {
- EVT T = VT.getVectorElementType();
- if (T == MVT::i8 || T == MVT::i16)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT, Val);
-
- return SDValue();
-}
-
static bool isSExtFree(SDValue N) {
// A sign-extend of a truncate of a sign-extend is free.
if (N.getOpcode() == ISD::TRUNCATE &&
@@ -1374,79 +1364,6 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-// Handle only specific vector loads.
-SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
- SDValue Chain = LoadNode->getChain();
- SDValue Ptr = Op.getOperand(1);
- SDValue LoweredLoad;
- SDValue Result;
- SDValue Base = LoadNode->getBasePtr();
- ISD::LoadExtType Ext = LoadNode->getExtensionType();
- unsigned Alignment = LoadNode->getAlignment();
- SDValue LoadChain;
-
- if(Ext == ISD::NON_EXTLOAD)
- Ext = ISD::ZEXTLOAD;
-
- if (VT == MVT::v4i16) {
- if (Alignment == 2) {
- SDValue Loads[4];
- // Base load.
- Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
- LoadNode->getPointerInfo(), MVT::i16, Alignment,
- LoadNode->getMemOperand()->getFlags());
- // Base+2 load.
- SDValue Increment = DAG.getConstant(2, DL, MVT::i32);
- Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
- Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
- LoadNode->getPointerInfo(), MVT::i16, Alignment,
- LoadNode->getMemOperand()->getFlags());
- // SHL 16, then OR base and base+2.
- SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32);
- SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
- SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
- // Base + 4.
- Increment = DAG.getConstant(4, DL, MVT::i32);
- Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
- Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
- LoadNode->getPointerInfo(), MVT::i16, Alignment,
- LoadNode->getMemOperand()->getFlags());
- // Base + 6.
- Increment = DAG.getConstant(6, DL, MVT::i32);
- Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
- Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
- LoadNode->getPointerInfo(), MVT::i16, Alignment,
- LoadNode->getMemOperand()->getFlags());
- // SHL 16, then OR base+4 and base+6.
- Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
- SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
- // Combine to i64. This could be optimised out later if we can
- // affect reg allocation of this code.
- Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
- LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- Loads[0].getValue(1), Loads[1].getValue(1),
- Loads[2].getValue(1), Loads[3].getValue(1));
- } else {
- // Perform default type expansion.
- Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
- LoadNode->getAlignment(),
- LoadNode->getMemOperand()->getFlags());
- LoadChain = Result.getValue(1);
- }
- } else
- llvm_unreachable("Custom lowering unsupported load");
-
- Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
- // Since we pretend to lower a load, we need the original chain
- // info attached to the result.
- SDValue Ops[] = { Result, LoadChain };
-
- return DAG.getMergeValues(Ops, DL);
-}
-
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
@@ -1971,18 +1888,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Handling of vector operations.
//
- // Custom lower v4i16 load only. Let v4i16 store to be
- // promoted for now.
promoteLdStType(MVT::v4i8, MVT::i32);
promoteLdStType(MVT::v2i16, MVT::i32);
promoteLdStType(MVT::v8i8, MVT::i64);
+ promoteLdStType(MVT::v4i16, MVT::i64);
promoteLdStType(MVT::v2i32, MVT::i64);
- setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
- setOperationAction(ISD::STORE, MVT::v4i16, Promote);
- AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64);
- AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
-
// Set the action for vector operations to "expand", then override it with
// either "custom" or "legal" for specific cases.
static const unsigned VectExpOps[] = {
@@ -2301,7 +2212,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE";
- case HexagonISD::VPACK: return "HexagonISD::VPACK";
+ case HexagonISD::VPACKE: return "HexagonISD::VPACKE";
+ case HexagonISD::VPACKO: return "HexagonISD::VPACKO";
case HexagonISD::VASL: return "HexagonISD::VASL";
case HexagonISD::VASR: return "HexagonISD::VASR";
case HexagonISD::VLSR: return "HexagonISD::VLSR";
@@ -2394,7 +2306,7 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
// Test if V1 is a SCALAR_TO_VECTOR.
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return createSplat(DAG, dl, VT, V1.getOperand(0));
+ return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
// (and probably will turn into a SCALAR_TO_VECTOR once legalization
@@ -2409,28 +2321,26 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
}
}
if (IsScalarToVector)
- return createSplat(DAG, dl, VT, V1.getOperand(0));
+ return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
}
- return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32));
+ return DAG.getNode(HexagonISD::VSPLAT, dl, VT,
+ DAG.getConstant(Lane, dl, MVT::i32));
}
if (UseHVX) {
ArrayRef<int> Mask = SVN->getMask();
size_t MaskLen = Mask.size();
- int ElemSizeInBits = VT.getScalarSizeInBits();
- if ((Subtarget.useHVXSglOps() && (ElemSizeInBits * MaskLen) == 64 * 8) ||
- (Subtarget.useHVXDblOps() && (ElemSizeInBits * MaskLen) == 128 * 8)) {
- // Return 1 for odd and 2 of even
- StridedLoadKind Pattern = isStridedLoad(Mask);
+ unsigned SizeInBits = VT.getScalarSizeInBits() * MaskLen;
+ if ((Subtarget.useHVXSglOps() && SizeInBits == 64 * 8) ||
+ (Subtarget.useHVXDblOps() && SizeInBits == 128 * 8)) {
+ StridedLoadKind Pattern = isStridedLoad(Mask);
if (Pattern == StridedLoadKind::NoPattern)
return SDValue();
- SDValue Vec0 = Op.getOperand(0);
- SDValue Vec1 = Op.getOperand(1);
- SDValue StridePattern = DAG.getConstant(Pattern, dl, MVT::i32);
- SDValue Ops[] = { Vec1, Vec0, StridePattern };
- return DAG.getNode(HexagonISD::VPACK, dl, VT, Ops);
+ unsigned Opc = Pattern == StridedLoadKind::Even ? HexagonISD::VPACKE
+ : HexagonISD::VPACKO;
+ return DAG.getNode(Opc, dl, VT, {Op.getOperand(1), Op.getOperand(0)});
}
// We used to assert in the "else" part here, but that is bad for Halide
// Halide creates intermediate double registers by interleaving two
@@ -2531,19 +2441,26 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (Size > 64)
return SDValue();
- APInt APSplatBits, APSplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
unsigned NElts = BVN->getNumOperands();
// Try to generate a SPLAT instruction.
- if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) &&
- (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, 0, true) && SplatBitSize <= 16)) {
- unsigned SplatBits = APSplatBits.getZExtValue();
- int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >>
- (32 - SplatBitSize));
- return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, dl, MVT::i32));
+ if (VT == MVT::v4i8 || VT == MVT::v4i16 || VT == MVT::v2i32) {
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, false)) {
+ if (SplatBitSize == VT.getVectorElementType().getSizeInBits()) {
+ unsigned ZV = APSplatBits.getZExtValue();
+ assert(SplatBitSize <= 32 && "Can only handle up to i32");
+ // Sign-extend the splat value from SplatBitSize to 32.
+ int32_t SV = SplatBitSize < 32
+ ? int32_t(ZV << (32-SplatBitSize)) >> (32-SplatBitSize)
+ : int32_t(ZV);
+ return DAG.getNode(HexagonISD::VSPLAT, dl, VT,
+ DAG.getConstant(SV, dl, MVT::i32));
+ }
+ }
}
// Try to generate COMBINE to build v2i32 vectors.
@@ -2974,8 +2891,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
- // Custom lower some vector loads.
- case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index bfd2c94eeaba..d66cbc95e918 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -62,7 +62,8 @@ namespace HexagonISD {
EXTRACTU,
EXTRACTURP,
VCOMBINE,
- VPACK,
+ VPACKE,
+ VPACKO,
TC_RETURN,
EH_RETURN,
DCFETCH,
@@ -164,7 +165,6 @@ namespace HexagonISD {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
bool CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index c611857ec26a..104a28654dd5 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -1366,6 +1366,18 @@ defm : MaskedStore <V6_vS32b_nqpred_ai, int_hexagon_V6_vmaskedstorenq>;
defm : MaskedStore <V6_vS32b_nt_qpred_ai, int_hexagon_V6_vmaskedstorentq>;
defm : MaskedStore <V6_vS32b_nt_nqpred_ai, int_hexagon_V6_vmaskedstorentnq>;
+//*******************************************************************
+// SYSTEM
+//*******************************************************************
+
+def: T_R_pat<Y2_dccleana, int_hexagon_Y2_dccleana>;
+def: T_R_pat<Y2_dccleaninva, int_hexagon_Y2_dccleaninva>;
+def: T_R_pat<Y2_dcinva, int_hexagon_Y2_dcinva>;
+def: T_R_pat<Y2_dczeroa, int_hexagon_Y2_dczeroa>;
+
+def: T_RR_pat<Y4_l2fetch, int_hexagon_Y4_l2fetch>;
+def: T_RP_pat<Y5_l2fetch, int_hexagon_Y5_l2fetch>;
+
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
include "HexagonIntrinsicsV5.td"
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index a331c978f59d..374ffa3799b0 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -10,8 +10,6 @@
// load/store instructions.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "opt-addr-mode"
-
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
@@ -36,6 +34,8 @@
#include <cassert>
#include <cstdint>
+#define DEBUG_TYPE "opt-addr-mode"
+
static cl::opt<int> CodeGrowthLimit("hexagon-amode-growth-limit",
cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode "
"optimization"));
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index ba98b8994937..804a547d5b33 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -2250,6 +2250,12 @@ def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>;
def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
+// Prefer this pattern to S2_asl_i_p_or for the special case of joining
+// two 32-bit words into a 64-bit word.
+let AddedComplexity = 200 in
+def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)),
+ (A2_combinew I32:$a, I32:$b)>;
+
def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)),
(i64 (zext (i32 (and I32:$a, (i32 65535)))))),
(shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))),
@@ -2971,45 +2977,40 @@ def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
(V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
Requires<[UseHVXDbl]>;
-def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>,
- SDTCisInt<3>]>;
-
-def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>;
-
-// 0 as the last argument denotes vpacke. 1 denotes vpacko
-def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
- (v64i8 VectorRegs:$Vt), (i32 0))),
- (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
- (v64i8 VectorRegs:$Vt), (i32 1))),
- (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
- (v32i16 VectorRegs:$Vt), (i32 0))),
- (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
- (v32i16 VectorRegs:$Vt), (i32 1))),
- (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-
-def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
- (v128i8 VecDblRegs:$Vt), (i32 0))),
- (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
-def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
- (v128i8 VecDblRegs:$Vt), (i32 1))),
- (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
-def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
- (v64i16 VecDblRegs:$Vt), (i32 0))),
- (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
-def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
- (v64i16 VecDblRegs:$Vt), (i32 1))),
- (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
+def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>;
+
+def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>;
+def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>;
+
+let Predicates = [UseHVXSgl] in {
+ def: Pat<(v64i8 (HexagonVPACKE (v64i8 VectorRegs:$Vs),
+ (v64i8 VectorRegs:$Vt))),
+ (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>;
+ def: Pat<(v64i8 (HexagonVPACKO (v64i8 VectorRegs:$Vs),
+ (v64i8 VectorRegs:$Vt))),
+ (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>;
+ def: Pat<(v32i16 (HexagonVPACKE (v32i16 VectorRegs:$Vs),
+ (v32i16 VectorRegs:$Vt))),
+ (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>;
+ def: Pat<(v32i16 (HexagonVPACKO (v32i16 VectorRegs:$Vs),
+ (v32i16 VectorRegs:$Vt))),
+ (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>;
+}
+
+let Predicates = [UseHVXDbl] in {
+ def: Pat<(v128i8 (HexagonVPACKE (v128i8 VecDblRegs:$Vs),
+ (v128i8 VecDblRegs:$Vt))),
+ (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>;
+ def: Pat<(v128i8 (HexagonVPACKO (v128i8 VecDblRegs:$Vs),
+ (v128i8 VecDblRegs:$Vt))),
+ (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>;
+ def: Pat<(v64i16 (HexagonVPACKE (v64i16 VecDblRegs:$Vs),
+ (v64i16 VecDblRegs:$Vt))),
+ (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>;
+ def: Pat<(v64i16 (HexagonVPACKO (v64i16 VecDblRegs:$Vs),
+ (v64i16 VecDblRegs:$Vt))),
+ (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>;
+}
def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
@@ -3073,6 +3074,10 @@ def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
// four halfwords of 64-bits destination register.
def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)),
+ (A2_combineii imm:$s8, imm:$s8)>;
+def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (A2_combinew I32:$Rs, I32:$Rs)>;
+
class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
: Pat <(Op Type:$Rss, Type:$Rtt),
@@ -3099,14 +3104,11 @@ def: VArith_pat <A2_xorp, xor, V8I8>;
def: VArith_pat <A2_xorp, xor, V4I16>;
def: VArith_pat <A2_xorp, xor, V2I32>;
-def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
- (i32 u5_0ImmPred:$c))))),
+def: Pat<(v2i32 (sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))),
(S2_asr_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
- (i32 u5_0ImmPred:$c))))),
+def: Pat<(v2i32 (srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))),
(S2_lsr_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
- (i32 u5_0ImmPred:$c))))),
+def: Pat<(v2i32 (shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))),
(S2_asl_i_vw V2I32:$b, imm:$c)>;
def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))),
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 34df2ebcc520..ea86c9c42f47 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -53,6 +53,10 @@ static cl::opt<bool>
EmitJtInText("hexagon-emit-jt-text", cl::Hidden, cl::init(false),
cl::desc("Emit hexagon jump tables in function section"));
+static cl::opt<bool>
+ EmitLutInText("hexagon-emit-lut-text", cl::Hidden, cl::init(false),
+ cl::desc("Emit hexagon lookup tables in function section"));
+
// TraceGVPlacement controls messages for all builds. For builds with assertions
// (debug or release), messages are also controlled by the usual debug flags
// (e.g. -debug and -debug-only=globallayout)
@@ -136,6 +140,13 @@ MCSection *HexagonTargetObjectFile::SelectSectionForGlobal(
<< (Kind.isBSS() ? "kind_bss " : "" )
<< (Kind.isBSSLocal() ? "kind_bss_local " : "" ));
+ // If the lookup table is used by more than one function, do not place
+ // it in text section.
+ if (EmitLutInText && GO->getName().startswith("switch.table")) {
+ if (const Function *Fn = getLutUsedFunction(GO))
+ return selectSectionForLookupTable(GO, TM, Fn);
+ }
+
if (isGlobalInSmallSection(GO, TM))
return selectSmallSectionForGlobal(GO, Kind, TM);
@@ -402,3 +413,39 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal(
// Otherwise, we work the same as ELF.
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
}
+
+// Return the function that uses the lookup table. If there are more
+// than one live function that uses this look table, bail out and place
+// the lookup table in default section.
+const Function *
+HexagonTargetObjectFile::getLutUsedFunction(const GlobalObject *GO) const {
+ const Function *ReturnFn = nullptr;
+ for (auto U : GO->users()) {
+ // validate each instance of user to be a live function.
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I)
+ continue;
+ auto *Bb = I->getParent();
+ if (!Bb)
+ continue;
+ auto *UserFn = Bb->getParent();
+ if (!ReturnFn)
+ ReturnFn = UserFn;
+ else if (ReturnFn != UserFn)
+ return nullptr;
+ }
+ return ReturnFn;
+}
+
+MCSection *HexagonTargetObjectFile::selectSectionForLookupTable(
+ const GlobalObject *GO, const TargetMachine &TM, const Function *Fn) const {
+
+ SectionKind Kind = SectionKind::getText();
+ // If the function has explicit section, place the lookup table in this
+ // explicit section.
+ if (Fn->hasSection())
+ return getExplicitSectionGlobal(Fn, Kind, TM);
+
+ const auto *FuncObj = dyn_cast<GlobalObject>(Fn);
+ return SelectSectionForGlobal(FuncObj, Kind, TM);
+}
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
index 373d850b53be..eff44f097e03 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -36,6 +36,8 @@ namespace llvm {
bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
const Function &F) const override;
+ const Function *getLutUsedFunction(const GlobalObject *GO) const;
+
private:
MCSectionELF *SmallDataSection;
MCSectionELF *SmallBSSSection;
@@ -46,6 +48,10 @@ namespace llvm {
MCSection *selectSmallSectionForGlobal(const GlobalObject *GO,
SectionKind Kind,
const TargetMachine &TM) const;
+
+ MCSection *selectSectionForLookupTable(const GlobalObject *GO,
+ const TargetMachine &TM,
+ const Function *Fn) const;
};
} // namespace llvm
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index b72c9d534478..e12188e70602 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -304,9 +304,6 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
- bool expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
- const MCSubtargetInfo *STI);
-
bool reportParseError(Twine ErrorMsg);
bool reportParseError(SMLoc Loc, Twine ErrorMsg);
@@ -2514,16 +2511,6 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::SEQIMacro:
return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
- case Mips::MFTC0: case Mips::MTTC0:
- case Mips::MFTGPR: case Mips::MTTGPR:
- case Mips::MFTLO: case Mips::MTTLO:
- case Mips::MFTHI: case Mips::MTTHI:
- case Mips::MFTACX: case Mips::MTTACX:
- case Mips::MFTDSP: case Mips::MTTDSP:
- case Mips::MFTC1: case Mips::MTTC1:
- case Mips::MFTHC1: case Mips::MTTHC1:
- case Mips::CFTC1: case Mips::CTTC1:
- return expandMXTRAlias(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
}
}
@@ -4895,212 +4882,6 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
-// Map the DSP accumulator and control register to the corresponding gpr
-// operand. Unlike the other alias, the m(f|t)t(lo|hi|acx) instructions
-// do not map the DSP registers contigously to gpr registers.
-static unsigned getRegisterForMxtrDSP(MCInst &Inst, bool IsMFDSP) {
- switch (Inst.getOpcode()) {
- case Mips::MFTLO:
- case Mips::MTTLO:
- switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
- case Mips::AC0:
- return Mips::ZERO;
- case Mips::AC1:
- return Mips::A0;
- case Mips::AC2:
- return Mips::T0;
- case Mips::AC3:
- return Mips::T4;
- default:
- llvm_unreachable("Unknown register for 'mttr' alias!");
- }
- case Mips::MFTHI:
- case Mips::MTTHI:
- switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
- case Mips::AC0:
- return Mips::AT;
- case Mips::AC1:
- return Mips::A1;
- case Mips::AC2:
- return Mips::T1;
- case Mips::AC3:
- return Mips::T5;
- default:
- llvm_unreachable("Unknown register for 'mttr' alias!");
- }
- case Mips::MFTACX:
- case Mips::MTTACX:
- switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) {
- case Mips::AC0:
- return Mips::V0;
- case Mips::AC1:
- return Mips::A2;
- case Mips::AC2:
- return Mips::T2;
- case Mips::AC3:
- return Mips::T6;
- default:
- llvm_unreachable("Unknown register for 'mttr' alias!");
- }
- case Mips::MFTDSP:
- case Mips::MTTDSP:
- return Mips::S0;
- default:
- llvm_unreachable("Unknown instruction for 'mttr' dsp alias!");
- }
-}
-
-// Map the floating point register operand to the corresponding register
-// operand.
-static unsigned getRegisterForMxtrFP(MCInst &Inst, bool IsMFTC1) {
- switch (Inst.getOperand(IsMFTC1 ? 1 : 0).getReg()) {
- case Mips::F0: return Mips::ZERO;
- case Mips::F1: return Mips::AT;
- case Mips::F2: return Mips::V0;
- case Mips::F3: return Mips::V1;
- case Mips::F4: return Mips::A0;
- case Mips::F5: return Mips::A1;
- case Mips::F6: return Mips::A2;
- case Mips::F7: return Mips::A3;
- case Mips::F8: return Mips::T0;
- case Mips::F9: return Mips::T1;
- case Mips::F10: return Mips::T2;
- case Mips::F11: return Mips::T3;
- case Mips::F12: return Mips::T4;
- case Mips::F13: return Mips::T5;
- case Mips::F14: return Mips::T6;
- case Mips::F15: return Mips::T7;
- case Mips::F16: return Mips::S0;
- case Mips::F17: return Mips::S1;
- case Mips::F18: return Mips::S2;
- case Mips::F19: return Mips::S3;
- case Mips::F20: return Mips::S4;
- case Mips::F21: return Mips::S5;
- case Mips::F22: return Mips::S6;
- case Mips::F23: return Mips::S7;
- case Mips::F24: return Mips::T8;
- case Mips::F25: return Mips::T9;
- case Mips::F26: return Mips::K0;
- case Mips::F27: return Mips::K1;
- case Mips::F28: return Mips::GP;
- case Mips::F29: return Mips::SP;
- case Mips::F30: return Mips::FP;
- case Mips::F31: return Mips::RA;
- default: llvm_unreachable("Unknown register for mttc1 alias!");
- }
-}
-
-// Map the coprocessor operand the corresponding gpr register operand.
-static unsigned getRegisterForMxtrC0(MCInst &Inst, bool IsMFTC0) {
- switch (Inst.getOperand(IsMFTC0 ? 1 : 0).getReg()) {
- case Mips::COP00: return Mips::ZERO;
- case Mips::COP01: return Mips::AT;
- case Mips::COP02: return Mips::V0;
- case Mips::COP03: return Mips::V1;
- case Mips::COP04: return Mips::A0;
- case Mips::COP05: return Mips::A1;
- case Mips::COP06: return Mips::A2;
- case Mips::COP07: return Mips::A3;
- case Mips::COP08: return Mips::T0;
- case Mips::COP09: return Mips::T1;
- case Mips::COP010: return Mips::T2;
- case Mips::COP011: return Mips::T3;
- case Mips::COP012: return Mips::T4;
- case Mips::COP013: return Mips::T5;
- case Mips::COP014: return Mips::T6;
- case Mips::COP015: return Mips::T7;
- case Mips::COP016: return Mips::S0;
- case Mips::COP017: return Mips::S1;
- case Mips::COP018: return Mips::S2;
- case Mips::COP019: return Mips::S3;
- case Mips::COP020: return Mips::S4;
- case Mips::COP021: return Mips::S5;
- case Mips::COP022: return Mips::S6;
- case Mips::COP023: return Mips::S7;
- case Mips::COP024: return Mips::T8;
- case Mips::COP025: return Mips::T9;
- case Mips::COP026: return Mips::K0;
- case Mips::COP027: return Mips::K1;
- case Mips::COP028: return Mips::GP;
- case Mips::COP029: return Mips::SP;
- case Mips::COP030: return Mips::FP;
- case Mips::COP031: return Mips::RA;
- default: llvm_unreachable("Unknown register for mttc0 alias!");
- }
-}
-
-/// Expand an alias of 'mftr' or 'mttr' into the full instruction, by producing
-/// an mftr or mttr with the correctly mapped gpr register, u, sel and h bits.
-bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
- const MCSubtargetInfo *STI) {
- MipsTargetStreamer &TOut = getTargetStreamer();
- unsigned rd = 0;
- unsigned u = 1;
- unsigned sel = 0;
- unsigned h = 0;
- bool IsMFTR = false;
- switch (Inst.getOpcode()) {
- case Mips::MFTC0:
- IsMFTR = true;
- LLVM_FALLTHROUGH;
- case Mips::MTTC0:
- u = 0;
- rd = getRegisterForMxtrC0(Inst, IsMFTR);
- sel = Inst.getOperand(2).getImm();
- break;
- case Mips::MFTGPR:
- IsMFTR = true;
- LLVM_FALLTHROUGH;
- case Mips::MTTGPR:
- rd = Inst.getOperand(IsMFTR ? 1 : 0).getReg();
- break;
- case Mips::MFTLO:
- case Mips::MFTHI:
- case Mips::MFTACX:
- case Mips::MFTDSP:
- IsMFTR = true;
- LLVM_FALLTHROUGH;
- case Mips::MTTLO:
- case Mips::MTTHI:
- case Mips::MTTACX:
- case Mips::MTTDSP:
- rd = getRegisterForMxtrDSP(Inst, IsMFTR);
- sel = 1;
- break;
- case Mips::MFTHC1:
- h = 1;
- LLVM_FALLTHROUGH;
- case Mips::MFTC1:
- IsMFTR = true;
- rd = getRegisterForMxtrFP(Inst, IsMFTR);
- sel = 2;
- break;
- case Mips::MTTHC1:
- h = 1;
- LLVM_FALLTHROUGH;
- case Mips::MTTC1:
- rd = getRegisterForMxtrFP(Inst, IsMFTR);
- sel = 2;
- break;
- case Mips::CFTC1:
- IsMFTR = true;
- LLVM_FALLTHROUGH;
- case Mips::CTTC1:
- rd = getRegisterForMxtrFP(Inst, IsMFTR);
- sel = 3;
- break;
- }
- unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd;
- unsigned Op1 =
- IsMFTR ? rd
- : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg()
- : Inst.getOperand(0).getReg());
-
- TOut.emitRRIII(IsMFTR ? Mips::MFTR : Mips::MTTR, Op0, Op1, u, sel, h, IDLoc,
- STI);
- return false;
-}
-
unsigned
MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
const OperandVector &Operands) {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 7caeb08589af..2907b7715857 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -193,21 +193,6 @@ void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1,
emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI);
}
-void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0,
- unsigned Reg1, int16_t Imm0, int16_t Imm1,
- int16_t Imm2, SMLoc IDLoc,
- const MCSubtargetInfo *STI) {
- MCInst TmpInst;
- TmpInst.setOpcode(Opcode);
- TmpInst.addOperand(MCOperand::createReg(Reg0));
- TmpInst.addOperand(MCOperand::createReg(Reg1));
- TmpInst.addOperand(MCOperand::createImm(Imm0));
- TmpInst.addOperand(MCOperand::createImm(Imm1));
- TmpInst.addOperand(MCOperand::createImm(Imm2));
- TmpInst.setLoc(IDLoc);
- getStreamer().EmitInstruction(TmpInst, *STI);
-}
-
void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg,
unsigned TrgReg, bool Is64Bit,
const MCSubtargetInfo *STI) {
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index d2f0fdcc6cc1..6ceb05577538 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -190,6 +190,9 @@ def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true",
def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">;
+def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true",
+ "Disable use of the jal instruction">;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index a6ec9fb2e598..20319f85696c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -364,6 +364,18 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
+ if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) {
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ }
+
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+
// Operations not directly supported by Mips.
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
@@ -469,6 +481,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::AssertZext);
setTargetDAGCombine(ISD::SHL);
@@ -923,14 +936,127 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
}
}
+static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG,
+ const MipsSubtarget &Subtarget) {
+ // ROOTNode must have a multiplication as an operand for the match to be
+ // successful.
+ if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL &&
+ ROOTNode->getOperand(1).getOpcode() != ISD::MUL)
+ return SDValue();
+
+ // We don't handle vector types here.
+ if (ROOTNode->getValueType(0).isVector())
+ return SDValue();
+
+ // For MIPS64, madd / msub instructions are inefficent to use with 64 bit
+ // arithmetic. E.g.
+ // (add (mul a b) c) =>
+ // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in
+ // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32)
+ // or
+ // MIPS64R2: (dins (mflo res) (mfhi res) 32 32)
+ //
+ // The overhead of setting up the Hi/Lo registers and reassembling the
+ // result makes this a dubious optimzation for MIPS64. The core of the
+ // problem is that Hi/Lo contain the upper and lower 32 bits of the
+ // operand and result.
+ //
+ // It requires a chain of 4 add/mul for MIPS64R2 to get better code
+ // density than doing it naively, 5 for MIPS64. Additionally, using
+ // madd/msub on MIPS64 requires the operands actually be 32 bit sign
+ // extended operands, not true 64 bit values.
+ //
+ // FIXME: For the moment, disable this completely for MIPS64.
+ if (Subtarget.hasMips64())
+ return SDValue();
+
+ SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL
+ ? ROOTNode->getOperand(0)
+ : ROOTNode->getOperand(1);
+
+ SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL
+ ? ROOTNode->getOperand(1)
+ : ROOTNode->getOperand(0);
+
+ // Transform this to a MADD only if the user of this node is the add.
+ // If there are other users of the mul, this function returns here.
+ if (!Mult.hasOneUse())
+ return SDValue();
+
+ // maddu and madd are unusual instructions in that on MIPS64 bits 63..31
+ // must be in canonical form, i.e. sign extended. For MIPS32, the operands
+ // of the multiply must have 32 or more sign bits, otherwise we cannot
+ // perform this optimization. We have to check this here as we're performing
+ // this optimization pre-legalization.
+ SDValue MultLHS = Mult->getOperand(0);
+ SDValue MultRHS = Mult->getOperand(1);
+
+ bool IsSigned = MultLHS->getOpcode() == ISD::SIGN_EXTEND &&
+ MultRHS->getOpcode() == ISD::SIGN_EXTEND;
+ bool IsUnsigned = MultLHS->getOpcode() == ISD::ZERO_EXTEND &&
+ MultRHS->getOpcode() == ISD::ZERO_EXTEND;
+
+ if (!IsSigned && !IsUnsigned)
+ return SDValue();
+
+ // Initialize accumulator.
+ SDLoc DL(ROOTNode);
+ SDValue TopHalf;
+ SDValue BottomHalf;
+ BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
+ CurDAG.getIntPtrConstant(0, DL));
+
+ TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
+ CurDAG.getIntPtrConstant(1, DL));
+ SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
+ BottomHalf,
+ TopHalf);
+
+ // Create MipsMAdd(u) / MipsMSub(u) node.
+ bool IsAdd = ROOTNode->getOpcode() == ISD::ADD;
+ unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd)
+ : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub);
+ SDValue MAddOps[3] = {
+ CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)),
+ CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn};
+ EVT VTs[2] = {MVT::i32, MVT::i32};
+ SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps);
+
+ SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
+ SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
+ SDValue Combined =
+ CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi);
+ return Combined;
+}
+
+static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ // (sub v0 (mul v1, v2)) => (msub v1, v2, v0)
+ if (DCI.isBeforeLegalizeOps()) {
+ if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
+ !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64)
+ return performMADD_MSUBCombine(N, DAG, Subtarget);
+
+ return SDValue();
+ }
+
+ return SDValue();
+}
+
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
- // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
+ // (add v0 (mul v1, v2)) => (madd v1, v2, v0)
+ if (DCI.isBeforeLegalizeOps()) {
+ if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
+ !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64)
+ return performMADD_MSUBCombine(N, DAG, Subtarget);
- if (DCI.isBeforeLegalizeOps())
return SDValue();
+ }
+ // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
SDValue Add = N->getOperand(1);
if (Add.getOpcode() != ISD::ADD)
@@ -1058,6 +1184,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performAssertZextCombine(N, DAG, DCI, Subtarget);
case ISD::SHL:
return performSHLCombine(N, DAG, DCI, Subtarget);
+ case ISD::SUB:
+ return performSUBCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -3021,6 +3149,20 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT Ty = Callee.getValueType();
bool GlobalOrExternal = false, IsCallReloc = false;
+ // The long-calls feature is ignored in case of PIC.
+ // While we do not support -mshared / -mno-shared properly,
+ // ignore long-calls in case of -mabicalls too.
+ if (Subtarget.useLongCalls() && !Subtarget.isABICalls() && !IsPIC) {
+ // Get the address of the callee into a register to prevent
+ // using of the `jal` instruction for the direct call.
+ if (auto *N = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
+ else if (auto *N = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
+ }
+
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
if (IsPIC) {
const GlobalValue *Val = G->getGlobal();
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 94f3a74be98b..0333fe6520fa 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -443,8 +443,17 @@ let AdditionalPredicates = [NotInMicroMips] in {
}
def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1,
bitconvert>, MFC1_FM<0>;
+def MFC1_D64 : MFC1_FT<"mfc1", GPR32Opnd, FGR64Opnd, II_MFC1>, MFC1_FM<0>,
+ FGR_64 {
+ let DecoderNamespace = "Mips64";
+}
def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
bitconvert>, MFC1_FM<4>;
+def MTC1_D64 : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>, MFC1_FM<4>,
+ FGR_64 {
+ let DecoderNamespace = "Mips64";
+}
+
let AdditionalPredicates = [NotInMicroMips] in {
def MFHC1_D32 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>,
MFC1_FM<3>, ISA_MIPS32R2, FGR_32;
diff --git a/lib/Target/Mips/MipsMTInstrFormats.td b/lib/Target/Mips/MipsMTInstrFormats.td
index edc0981e6278..64bee5bfba18 100644
--- a/lib/Target/Mips/MipsMTInstrFormats.td
+++ b/lib/Target/Mips/MipsMTInstrFormats.td
@@ -35,8 +35,6 @@ class FIELD5<bits<5> Val> {
def FIELD5_1_DMT_EMT : FIELD5<0b00001>;
def FIELD5_2_DMT_EMT : FIELD5<0b01111>;
def FIELD5_1_2_DVPE_EVPE : FIELD5<0b00000>;
-def FIELD5_MFTR : FIELD5<0b01000>;
-def FIELD5_MTTR : FIELD5<0b01100>;
class COP0_MFMC0_MT<FIELD5 Op1, FIELD5 Op2, OPCODE1 sc> : MipsMTInst {
bits<32> Inst;
@@ -52,25 +50,6 @@ class COP0_MFMC0_MT<FIELD5 Op1, FIELD5 Op2, OPCODE1 sc> : MipsMTInst {
let Inst{2-0} = 0b001;
}
-class COP0_MFTTR_MT<FIELD5 Op> : MipsMTInst {
- bits<32> Inst;
-
- bits<5> rt;
- bits<5> rd;
- bits<1> u;
- bits<1> h;
- bits<3> sel;
- let Inst{31-26} = 0b010000; // COP0
- let Inst{25-21} = Op.Value; // MFMC0
- let Inst{20-16} = rt;
- let Inst{15-11} = rd;
- let Inst{10-6} = 0b00000; // rx - currently unsupported.
- let Inst{5} = u;
- let Inst{4} = h;
- let Inst{3} = 0b0;
- let Inst{2-0} = sel;
-}
-
class SPECIAL3_MT_FORK : MipsMTInst {
bits<32> Inst;
diff --git a/lib/Target/Mips/MipsMTInstrInfo.td b/lib/Target/Mips/MipsMTInstrInfo.td
index 72e626cbec40..ab6693f60fd9 100644
--- a/lib/Target/Mips/MipsMTInstrInfo.td
+++ b/lib/Target/Mips/MipsMTInstrInfo.td
@@ -6,13 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file describes the MIPS MT ASE as defined by MD00378 1.12.
-//
-// TODO: Add support for the microMIPS encodings for the MT ASE and add the
-// instruction mappings.
-//
-//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// MIPS MT Instruction Encodings
@@ -34,10 +27,6 @@ class FORK_ENC : SPECIAL3_MT_FORK;
class YIELD_ENC : SPECIAL3_MT_YIELD;
-class MFTR_ENC : COP0_MFTTR_MT<FIELD5_MFTR>;
-
-class MTTR_ENC : COP0_MFTTR_MT<FIELD5_MTTR>;
-
//===----------------------------------------------------------------------===//
// MIPS MT Instruction Descriptions
//===----------------------------------------------------------------------===//
@@ -50,22 +39,6 @@ class MT_1R_DESC_BASE<string instr_asm, InstrItinClass Itin = NoItinerary> {
InstrItinClass Itinerary = Itin;
}
-class MFTR_DESC {
- dag OutOperandList = (outs GPR32Opnd:$rd);
- dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h);
- string AsmString = "mftr\t$rd, $rt, $u, $sel, $h";
- list<dag> Pattern = [];
- InstrItinClass Itinerary = II_MFTR;
-}
-
-class MTTR_DESC {
- dag OutOperandList = (outs GPR32Opnd:$rd);
- dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h);
- string AsmString = "mttr\t$rt, $rd, $u, $sel, $h";
- list<dag> Pattern = [];
- InstrItinClass Itinerary = II_MTTR;
-}
-
class FORK_DESC {
dag OutOperandList = (outs GPR32Opnd:$rs, GPR32Opnd:$rd);
dag InOperandList = (ins GPR32Opnd:$rt);
@@ -106,74 +79,9 @@ let hasSideEffects = 1, isNotDuplicable = 1,
def FORK : FORK_ENC, FORK_DESC, ASE_MT;
def YIELD : YIELD_ENC, YIELD_DESC, ASE_MT;
-
- def MFTR : MFTR_ENC, MFTR_DESC, ASE_MT;
-
- def MTTR : MTTR_ENC, MTTR_DESC, ASE_MT;
}
//===----------------------------------------------------------------------===//
-// MIPS MT Pseudo Instructions - used to support mtfr & mttr aliases.
-//===----------------------------------------------------------------------===//
-def MFTC0 : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins COP0Opnd:$rt,
- uimm3:$sel),
- "mftc0 $rd, $rt, $sel">, ASE_MT;
-
-def MFTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt,
- uimm3:$sel),
- "mftgpr $rd, $rt">, ASE_MT;
-
-def MFTLO : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
- "mftlo $rt, $ac">, ASE_MT;
-
-def MFTHI : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
- "mfthi $rt, $ac">, ASE_MT;
-
-def MFTACX : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac),
- "mftacx $rt, $ac">, ASE_MT;
-
-def MFTDSP : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins),
- "mftdsp $rt">, ASE_MT;
-
-def MFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft),
- "mftc1 $rt, $ft">, ASE_MT;
-
-def MFTHC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft),
- "mfthc1 $rt, $ft">, ASE_MT;
-
-def CFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGRCCOpnd:$ft),
- "cftc1 $rt, $ft">, ASE_MT;
-
-
-def MTTC0 : MipsAsmPseudoInst<(outs COP0Opnd:$rd), (ins GPR32Opnd:$rt,
- uimm3:$sel),
- "mttc0 $rt, $rd, $sel">, ASE_MT;
-
-def MTTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins GPR32Opnd:$rd),
- "mttgpr $rd, $rt">, ASE_MT;
-
-def MTTLO : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
- "mttlo $rt, $ac">, ASE_MT;
-
-def MTTHI : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
- "mtthi $rt, $ac">, ASE_MT;
-
-def MTTACX : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt),
- "mttacx $rt, $ac">, ASE_MT;
-
-def MTTDSP : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rt),
- "mttdsp $rt">, ASE_MT;
-
-def MTTC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt),
- "mttc1 $rt, $ft">, ASE_MT;
-
-def MTTHC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt),
- "mtthc1 $rt, $ft">, ASE_MT;
-
-def CTTC1 : MipsAsmPseudoInst<(outs FGRCCOpnd:$ft), (ins GPR32Opnd:$rt),
- "cttc1 $rt, $ft">, ASE_MT;
-
-//===----------------------------------------------------------------------===//
// MIPS MT Instruction Definitions
//===----------------------------------------------------------------------===//
@@ -187,22 +95,4 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"evpe", (EVPE ZERO), 1>, ASE_MT;
def : MipsInstAlias<"yield $rs", (YIELD ZERO, GPR32Opnd:$rs), 1>, ASE_MT;
-
- def : MipsInstAlias<"mftc0 $rd, $rt", (MFTC0 GPR32Opnd:$rd, COP0Opnd:$rt, 0),
- 1>, ASE_MT;
-
- def : MipsInstAlias<"mftlo $rt", (MFTLO GPR32Opnd:$rt, AC0), 1>, ASE_MT;
-
- def : MipsInstAlias<"mfthi $rt", (MFTHI GPR32Opnd:$rt, AC0), 1>, ASE_MT;
-
- def : MipsInstAlias<"mftacx $rt", (MFTACX GPR32Opnd:$rt, AC0), 1>, ASE_MT;
-
- def : MipsInstAlias<"mttc0 $rd, $rt", (MTTC0 COP0Opnd:$rt, GPR32Opnd:$rd, 0),
- 1>, ASE_MT;
-
- def : MipsInstAlias<"mttlo $rt", (MTTLO AC0, GPR32Opnd:$rt), 1>, ASE_MT;
-
- def : MipsInstAlias<"mtthi $rt", (MTTHI AC0, GPR32Opnd:$rt), 1>, ASE_MT;
-
- def : MipsInstAlias<"mttacx $rt", (MTTACX AC0, GPR32Opnd:$rt), 1>, ASE_MT;
}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 49ae6dd4cd39..4be26dd25dc0 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -245,46 +245,64 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
}
}
-void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
- SDValue CmpLHS, const SDLoc &DL,
- SDNode *Node) const {
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
-
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu;
- if (Subtarget->isGP64bit()) {
- SLTuOp = Mips::SLTu64;
- ADDuOp = Mips::DADDu;
- }
-
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
+ SDValue InFlag = Node->getOperand(2);
+ unsigned Opc = InFlag.getOpcode();
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
- SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops);
-
- if (Subtarget->isGP64bit()) {
- // On 64-bit targets, sltu produces an i64 but our backend currently says
- // that SLTu64 produces an i32. We need to fix this in the long run but for
- // now, just make the DAG type-correct by asserting the upper bits are zero.
- Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
- CurDAG->getTargetConstant(0, DL, VT),
- SDValue(Carry, 0),
- CurDAG->getTargetConstant(Mips::sub_32, DL,
- VT));
+ // In the base case, we can rely on the carry bit from the addsc
+ // instruction.
+ if (Opc == ISD::ADDC) {
+ SDValue Ops[3] = {LHS, RHS, InFlag};
+ CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops);
+ return;
}
- // Generate a second addition only if we know that RHS is not a
- // constant-zero node.
- SDNode *AddCarry = Carry;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
- if (!C || C->getZExtValue())
- AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
+ assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!");
+
+ // The more complex case is when there is a chain of ISD::ADDE nodes like:
+ // (adde (adde (adde (addc a b) c) d) e).
+ //
+ // The addwc instruction does not write to the carry bit, instead it writes
+ // to bit 20 of the dsp control register. To match this series of nodes, each
+ // intermediate adde node must be expanded to write the carry bit before the
+ // addition.
+
+ // Start by reading the overflow field for addsc and moving the value to the
+ // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP
+ // corresponds to reading/writing the entire control register to/from a GPR.
+
+ SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32);
+
+ SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32);
+
+ SDNode *DSPCtrlField =
+ CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag);
+
+ SDNode *Carry = CurDAG->getMachineNode(
+ Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne);
- CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0));
+ SDValue Ops[4] = {SDValue(DSPCtrlField, 0),
+ CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne,
+ SDValue(Carry, 0)};
+ SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops);
+
+ // My reading of the the MIPS DSP 3.01 specification isn't as clear as I
+ // would like about whether bit 20 always gets overwritten by addwc.
+ // Hence take an extremely conservative view and presume it's sticky. We
+ // therefore need to clear it.
+
+ SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)};
+ SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps);
+
+ SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue,
+ SDValue(DSPCtrlFinal, 0), CstOne);
+
+ SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)};
+ CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands);
}
/// Match frameindex
@@ -765,19 +783,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
switch(Opcode) {
default: break;
- case ISD::SUBE: {
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu;
- selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node);
- return true;
- }
-
case ISD::ADDE: {
- if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
- break;
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu;
- selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node);
+ selectAddE(Node, DL);
return true;
}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index f89a350cab04..6f38289c5a45 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -41,8 +41,7 @@ private:
const SDLoc &dl, EVT Ty, bool HasLo,
bool HasHi);
- void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
- const SDLoc &DL, SDNode *Node) const;
+ void selectAddE(SDNode *Node, const SDLoc &DL) const;
bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const;
bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 06a97b9d123e..72b2738bfac4 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -179,8 +179,6 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
- setTargetDAGCombine(ISD::ADDE);
- setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::MUL);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -421,163 +419,6 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
return MipsTargetLowering::LowerOperation(Op, DAG);
}
-// selectMADD -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-// (addc multLo, Lo0), (adde multHi, Hi0),
-// where,
-// multHi/Lo: product of multiplication
-// Lo0: initial value of Lo register
-// Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
- // ADDENode's second operand must be a flag output of an ADDC node in order
- // for the matching to be successful.
- SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
-
- if (ADDCNode->getOpcode() != ISD::ADDC)
- return false;
-
- SDValue MultHi = ADDENode->getOperand(0);
- SDValue MultLo = ADDCNode->getOperand(0);
- SDNode *MultNode = MultHi.getNode();
- unsigned MultOpc = MultHi.getOpcode();
-
- // MultHi and MultLo must be generated by the same node,
- if (MultLo.getNode() != MultNode)
- return false;
-
- // and it must be a multiplication.
- if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
- return false;
-
- // MultLo amd MultHi must be the first and second output of MultNode
- // respectively.
- if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
- return false;
-
- // Transform this to a MADD only if ADDENode and ADDCNode are the only users
- // of the values of MultNode, in which case MultNode will be removed in later
- // phases.
- // If there exist users other than ADDENode or ADDCNode, this function returns
- // here, which will result in MultNode being mapped to a single MULT
- // instruction node rather than a pair of MULT and MADD instructions being
- // produced.
- if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
- return false;
-
- SDLoc DL(ADDENode);
-
- // Initialize accumulator.
- SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
- ADDCNode->getOperand(1),
- ADDENode->getOperand(1));
-
- // create MipsMAdd(u) node
- MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
-
- SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
- MultNode->getOperand(0),// Factor 0
- MultNode->getOperand(1),// Factor 1
- ACCIn);
-
- // replace uses of adde and addc here
- if (!SDValue(ADDCNode, 0).use_empty()) {
- SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
- }
- if (!SDValue(ADDENode, 0).use_empty()) {
- SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
- }
-
- return true;
-}
-
-// selectMSUB -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-// (addc Lo0, multLo), (sube Hi0, multHi),
-// where,
-// multHi/Lo: product of multiplication
-// Lo0: initial value of Lo register
-// Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
- // SUBENode's second operand must be a flag output of an SUBC node in order
- // for the matching to be successful.
- SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
-
- if (SUBCNode->getOpcode() != ISD::SUBC)
- return false;
-
- SDValue MultHi = SUBENode->getOperand(1);
- SDValue MultLo = SUBCNode->getOperand(1);
- SDNode *MultNode = MultHi.getNode();
- unsigned MultOpc = MultHi.getOpcode();
-
- // MultHi and MultLo must be generated by the same node,
- if (MultLo.getNode() != MultNode)
- return false;
-
- // and it must be a multiplication.
- if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
- return false;
-
- // MultLo amd MultHi must be the first and second output of MultNode
- // respectively.
- if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
- return false;
-
- // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
- // of the values of MultNode, in which case MultNode will be removed in later
- // phases.
- // If there exist users other than SUBENode or SUBCNode, this function returns
- // here, which will result in MultNode being mapped to a single MULT
- // instruction node rather than a pair of MULT and MSUB instructions being
- // produced.
- if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
- return false;
-
- SDLoc DL(SUBENode);
-
- // Initialize accumulator.
- SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
- SUBCNode->getOperand(0),
- SUBENode->getOperand(0));
-
- // create MipsSub(u) node
- MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
-
- SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
- MultNode->getOperand(0),// Factor 0
- MultNode->getOperand(1),// Factor 1
- ACCIn);
-
- // replace uses of sube and subc here
- if (!SDValue(SUBCNode, 0).use_empty()) {
- SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
- }
- if (!SDValue(SUBENode, 0).use_empty()) {
- SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
- }
-
- return true;
-}
-
-static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget &Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
- N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
//
// Performs the following transformations:
@@ -820,19 +661,6 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget &Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 &&
- selectMSUB(N, &DAG))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT,
EVT ShiftTy, SelectionDAG &DAG) {
// Clear the upper (64 - VT.sizeInBits) bits.
@@ -1110,16 +938,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
SDValue Val;
switch (N->getOpcode()) {
- case ISD::ADDE:
- return performADDECombine(N, DAG, DCI, Subtarget);
case ISD::AND:
Val = performANDCombine(N, DAG, DCI, Subtarget);
break;
case ISD::OR:
Val = performORCombine(N, DAG, DCI, Subtarget);
break;
- case ISD::SUBE:
- return performSUBECombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
return performMULCombine(N, DAG, DCI, this);
case ISD::SHL:
@@ -3596,9 +3420,17 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
: (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
: &Mips::GPR64RegClass);
const bool UsingMips32 = RC == &Mips::GPR32RegClass;
- unsigned Rs = RegInfo.createVirtualRegister(RC);
+ unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
+ if(!UsingMips32) {
+ unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
+ .addImm(0)
+ .addReg(Rs)
+ .addImm(Mips::sub_32);
+ Rs = Tmp;
+ }
BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
.addReg(Rs)
.addReg(Rt)
@@ -3649,6 +3481,12 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
for (unsigned i = 1; i < MI.getNumOperands(); i++)
MIB.add(MI.getOperand(i));
+ if(!UsingMips32) {
+ unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32);
+ Rt = Tmp;
+ }
+
BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
MI.eraseFromParent();
@@ -3716,6 +3554,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
+ bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
@@ -3726,7 +3565,9 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
const TargetRegisterClass *GPRRC =
IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
- unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1;
+ unsigned MFC1Opc = IsFGR64onMips64
+ ? Mips::DMFC1
+ : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
// Perform the register class copy as mentioned above.
@@ -3735,7 +3576,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
unsigned WPHI = Wtemp;
- if (!Subtarget.hasMips64() && IsFGR64) {
+ if (IsFGR64onMips32) {
unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
@@ -3829,7 +3670,9 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
const TargetRegisterClass *GPRRC =
IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
- unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1;
+ unsigned MTC1Opc = IsFGR64onMips64
+ ? Mips::DMTC1
+ : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 8ec55ab6284d..c2947bb44ef5 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -226,7 +226,6 @@ def II_MFC1 : InstrItinClass;
def II_MFHC1 : InstrItinClass;
def II_MFC2 : InstrItinClass;
def II_MFHI_MFLO : InstrItinClass; // mfhi and mflo
-def II_MFTR : InstrItinClass;
def II_MOD : InstrItinClass;
def II_MODU : InstrItinClass;
def II_MOVE : InstrItinClass;
@@ -256,7 +255,6 @@ def II_MTC1 : InstrItinClass;
def II_MTHC1 : InstrItinClass;
def II_MTC2 : InstrItinClass;
def II_MTHI_MTLO : InstrItinClass; // mthi and mtlo
-def II_MTTR : InstrItinClass;
def II_MUL : InstrItinClass;
def II_MUH : InstrItinClass;
def II_MUHU : InstrItinClass;
@@ -666,14 +664,12 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_MFHC0 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MFC1 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MFC2 , [InstrStage<2, [ALU]>]>,
- InstrItinData<II_MFTR , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTC0 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTHC0 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTC1 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTC2 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MFHC1 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTHC1 , [InstrStage<2, [ALU]>]>,
- InstrItinData<II_MTTR , [InstrStage<2, [ALU]>]>,
InstrItinData<II_CACHE , [InstrStage<1, [ALU]>]>,
InstrItinData<II_PREF , [InstrStage<1, [ALU]>]>,
InstrItinData<II_CACHEE , [InstrStage<1, [ALU]>]>,
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 7619e7b08612..cce3b8c4c8d1 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -152,6 +152,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasMT -- support MT ASE.
bool HasMT;
+ // Disable use of the `jal` instruction.
+ bool UseLongCalls = false;
+
InstrItineraryData InstrItins;
// We can override the determination of whether we are in mips16 mode
@@ -269,6 +272,8 @@ public:
bool useSoftFloat() const { return IsSoftFloat; }
+ bool useLongCalls() const { return UseLongCalls; }
+
bool enableLongBranchPass() const {
return hasStandardEncoding() || allowMixed16_32();
}
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index af24838665e1..7d9f99ce071e 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -119,9 +119,6 @@ public:
SMLoc IDLoc, const MCSubtargetInfo *STI);
void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm,
SMLoc IDLoc, const MCSubtargetInfo *STI);
- void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0,
- int16_t Imm1, int16_t Imm2, SMLoc IDLoc,
- const MCSubtargetInfo *STI);
void emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit,
const MCSubtargetInfo *STI);
void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index a00b56af0490..92c8c224b71b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -271,7 +271,8 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12;
const MCOperand &MO = MI.getOperand(OpNo);
- assert(MO.isImm());
+ assert(MO.isImm() && !(MO.getImm() % 16) &&
+ "Expecting an immediate that is a multiple of 16");
return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
}
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3aaf7ef2c2a0..901539b682ba 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -178,7 +178,7 @@ namespace {
/// a base register plus a signed 16-bit displacement [r+imm].
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
@@ -211,7 +211,11 @@ namespace {
/// a base register plus a signed 16-bit displacement that is a multiple of 4.
/// Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
+ }
+
+ bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
}
// Select an address into a single register.
@@ -305,6 +309,7 @@ private:
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
+ bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode *N, SDNode *Result);
};
@@ -2999,6 +3004,25 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare,
return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
}
+/// Does this node represent a load/store node whose address can be represented
+/// with a register plus an immediate that's a multiple of \p Val:
+bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
+ LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
+ StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
+ SDValue AddrOp;
+ if (LDN)
+ AddrOp = LDN->getOperand(1);
+ else if (STN)
+ AddrOp = STN->getOperand(2);
+
+ short Imm = 0;
+ if (AddrOp.getOpcode() == ISD::ADD)
+ return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
+
+ // If the address comes from the outside, the offset will be zero.
+ return AddrOp.getOpcode() == ISD::CopyFromReg;
+}
+
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 0e069ec1665f..b3a3c73f6df0 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2130,12 +2130,12 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg. If Aligned is true, only accept displacements
-/// suitable for STD and friends, i.e. multiples of 4.
+/// represented as reg+reg. If \p Alignment is non-zero, only accept
+/// displacements that are multiples of that value.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG,
- bool Aligned) const {
+ unsigned Alignment) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
@@ -2145,7 +2145,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
if (N.getOpcode() == ISD::ADD) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!Aligned || (imm & 3) == 0)) {
+ (!Alignment || (imm % Alignment) == 0)) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@@ -2169,7 +2169,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
} else if (N.getOpcode() == ISD::OR) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!Aligned || (imm & 3) == 0)) {
+ (!Alignment || (imm % Alignment) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
@@ -2196,7 +2196,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
int16_t Imm;
- if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
+ if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@@ -2206,7 +2206,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
- (!Aligned || (CN->getZExtValue() & 3) == 0)) {
+ (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@@ -2321,14 +2321,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
// LDU/STU can only handle immediates that are a multiple of 4.
if (VT != MVT::i64) {
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
if (Alignment < 4)
return false;
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
return false;
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 821927d3b157..49d7d8220af1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -616,7 +616,7 @@ namespace llvm {
/// is not better represented as reg+reg. If Aligned is true, only accept
/// displacements suitable for STD and friends, i.e. multiples of 4.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
- SelectionDAG &DAG, bool Aligned) const;
+ SelectionDAG &DAG, unsigned Alignment) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 13b4f9ab962d..e74ba38c351f 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1635,8 +1635,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (equalityOnly) {
// We need to check the uses of the condition register in order to reject
// non-equality comparisons.
- for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg),
- IE = MRI->use_instr_end(); I != IE; ++I) {
+ for (MachineRegisterInfo::use_instr_iterator
+ I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
+ I != IE; ++I) {
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
unsigned Pred = UseMI->getOperand(0).getImm();
@@ -1658,8 +1659,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
++I) {
bool FoundUse = false;
- for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg),
- JE = MRI->use_instr_end(); J != JE; ++J)
+ for (MachineRegisterInfo::use_instr_iterator
+ J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
+ J != JE; ++J)
if (&*J == &*I) {
FoundUse = true;
break;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 6d9f55206b6a..dd7fc2659102 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -405,6 +405,25 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() < 4;
}]>;
+// This is a somewhat weaker condition than actually checking for 16-byte
+// alignment. It is simply checking that the displacement can be represented
+// as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form
+// instructions).
+def quadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isOffsetMultipleOf(N, 16);
+}]>;
+def quadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isOffsetMultipleOf(N, 16);
+}]>;
+def nonQuadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return !isOffsetMultipleOf(N, 16);
+}]>;
+def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return !isOffsetMultipleOf(N, 16);
+}]>;
+
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -815,7 +834,8 @@ def pred : Operand<OtherVT> {
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
-def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
// The address in a single register. This is used with the SjLj
// pseudo-instructions.
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 43635a8919e2..942e8b392b82 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2606,37 +2606,41 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // IsLittleEndian, HasP9Vector
// D-Form Load/Store
- def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>;
-
- def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst),
+ def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>;
+
+ def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst),
(STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst),
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst),
(STXV $rS, memrix16:$dst)>;
- def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>;
- def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>;
- def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>;
- def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>;
- def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
- def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
- def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
- def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst),
- (STXVX $rS, xaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst),
- (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
+ def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (LXVWSX xoaddr:$src))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
@@ -2788,21 +2792,21 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let isPseudo = 1 in {
def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
"#DFLOADf32",
- [(set f32:$XT, (load iaddr:$src))]>;
+ [(set f32:$XT, (load ixaddr:$src))]>;
def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
"#DFLOADf64",
- [(set f64:$XT, (load iaddr:$src))]>;
+ [(set f64:$XT, (load ixaddr:$src))]>;
def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
"#DFSTOREf32",
- [(store f32:$XT, iaddr:$dst)]>;
+ [(store f32:$XT, ixaddr:$dst)]>;
def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
"#DFSTOREf64",
- [(store f64:$XT, iaddr:$dst)]>;
+ [(store f64:$XT, ixaddr:$dst)]>;
}
- def : Pat<(f64 (extloadf32 iaddr:$src)),
- (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>;
- def : Pat<(f32 (fpround (extloadf32 iaddr:$src))),
- (f32 (DFLOADf32 iaddr:$src))>;
+ def : Pat<(f64 (extloadf32 ixaddr:$src)),
+ (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
+ def : Pat<(f32 (fpround (extloadf32 ixaddr:$src))),
+ (f32 (DFLOADf32 ixaddr:$src))>;
} // end HasP9Vector, AddedComplexity
// Integer extend helper dags 32 -> 64
@@ -2881,13 +2885,13 @@ def FltToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
}
def FltToLongLoadP9 {
- dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A)))));
+ dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A)))));
}
def FltToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
}
def FltToULongLoadP9 {
- dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A)))));
+ dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A)))));
}
def FltToLong {
dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
@@ -2911,13 +2915,13 @@ def DblToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
}
def DblToIntLoadP9 {
- dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A)))));
+ dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A)))));
}
def DblToUIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
}
def DblToUIntLoadP9 {
- dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A)))));
+ dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A)))));
}
def DblToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
@@ -3088,17 +3092,17 @@ let AddedComplexity = 400 in {
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
+ (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
+ (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddr:$A),
+ (DFLOADf32 ixaddr:$A),
VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddr:$A),
+ (DFLOADf32 ixaddr:$A),
VSFRC)), 0))>;
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 8af7f7e98117..9207165c46a6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -754,19 +754,31 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
return false;
}
-// Figure out if the offset in the instruction must be a multiple of 4.
-// This is true for instructions like "STD".
-static bool usesIXAddr(const MachineInstr &MI) {
+// If the offset must be a multiple of some value, return what that value is.
+static unsigned offsetMinAlign(const MachineInstr &MI) {
unsigned OpC = MI.getOpcode();
switch (OpC) {
default:
- return false;
+ return 1;
case PPC::LWA:
case PPC::LWA_32:
case PPC::LD:
+ case PPC::LDU:
case PPC::STD:
- return true;
+ case PPC::STDU:
+ case PPC::DFLOADf32:
+ case PPC::DFLOADf64:
+ case PPC::DFSTOREf32:
+ case PPC::DFSTOREf64:
+ case PPC::LXSD:
+ case PPC::LXSSP:
+ case PPC::STXSD:
+ case PPC::STXSSP:
+ return 4;
+ case PPC::LXV:
+ case PPC::STXV:
+ return 16;
}
}
@@ -852,9 +864,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum).ChangeToRegister(
FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false);
- // Figure out if the offset in the instruction is shifted right two bits.
- bool isIXAddr = usesIXAddr(MI);
-
// If the instruction is not present in ImmToIdxMap, then it has no immediate
// form (and must be r+r).
bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP &&
@@ -883,7 +892,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// happen in invalid code.
assert(OpC != PPC::DBG_VALUE &&
"This should be handled in a target-independent way");
- if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) ||
+ if (!noImmForm && ((isInt<16>(Offset) &&
+ ((Offset % offsetMinAlign(MI)) == 0)) ||
OpC == TargetOpcode::STACKMAP ||
OpC == TargetOpcode::PATCHPOINT)) {
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -1076,5 +1086,5 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
MI->getOpcode() == TargetOpcode::STACKMAP ||
MI->getOpcode() == TargetOpcode::PATCHPOINT ||
- (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
+ (isInt<16>(Offset) && (Offset % offsetMinAlign(*MI)) == 0);
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 2dc3828334ac..be705507b534 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -41,6 +41,8 @@ public:
~PPCTargetMachine() override;
const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
+ // The no argument getSubtargetImpl, while it exists on some targets, is
+ // deprecated and should not be used.
const PPCSubtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 11004c5a952f..91cab00b2b65 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -20,6 +20,10 @@ include "llvm/Target/Target.td"
// SPARC Subtarget features.
//
+def FeatureSoftMulDiv
+ : SubtargetFeature<"soft-mul-div", "UseSoftMulDiv", "true",
+ "Use software emulation for integer multiply and divide">;
+
def FeatureV9
: SubtargetFeature<"v9", "IsV9", "true",
"Enable SPARC-V9 instructions">;
@@ -75,7 +79,7 @@ class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
def : Proc<"generic", []>;
-def : Proc<"v7", []>;
+def : Proc<"v7", [FeatureSoftMulDiv]>;
def : Proc<"v8", []>;
def : Proc<"supersparc", []>;
def : Proc<"sparclite", []>;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 9e7e3c6b705a..6767a59a9757 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1689,6 +1689,19 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::MUL, MVT::i32, Expand);
+ if (Subtarget->useSoftMulDiv()) {
+ // .umul works for both signed and unsigned
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setLibcallName(RTLIB::MUL_I32, ".umul");
+
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setLibcallName(RTLIB::SDIV_I32, ".div");
+
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setLibcallName(RTLIB::UDIV_I32, ".udiv");
+ }
+
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index ae45c8be6752..3194ad4aeb6b 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -27,6 +27,9 @@ def Is32Bit : Predicate<"!Subtarget->is64Bit()">;
// True when generating 64-bit code. This also implies HasV9.
def Is64Bit : Predicate<"Subtarget->is64Bit()">;
+def UseSoftMulDiv : Predicate<"Subtarget->useSoftMulDiv()">,
+ AssemblerPredicate<"FeatureSoftMulDiv">;
+
// HasV9 - This predicate is true when the target processor supports V9
// instructions. Note that the machine may be running in 32-bit mode.
def HasV9 : Predicate<"Subtarget->isV9()">,
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 43ddef3cc96e..daac56add87c 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -28,6 +28,7 @@ void SparcSubtarget::anchor() { }
SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
+ UseSoftMulDiv = false;
IsV9 = false;
IsLeon = false;
V8DeprecatedInsts = false;
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index fa42da425ff2..d18139984b87 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -32,6 +32,7 @@ class StringRef;
class SparcSubtarget : public SparcGenSubtargetInfo {
Triple TargetTriple;
virtual void anchor();
+ bool UseSoftMulDiv;
bool IsV9;
bool IsLeon;
bool V8DeprecatedInsts;
@@ -76,6 +77,7 @@ public:
bool enableMachineScheduler() const override;
+ bool useSoftMulDiv() const { return UseSoftMulDiv; }
bool isV9() const { return IsV9; }
bool isLeon() const { return IsLeon; }
bool isVIS() const { return IsVIS; }
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index ee23692ad1db..33680789ee08 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -275,6 +275,10 @@ public:
SMLoc getEndLoc() const override { return EndLoc; }
void print(raw_ostream &OS) const override;
+ /// getLocRange - Get the range between the first and last token of this
+ /// operand.
+ SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+
// Used by the TableGen code to add particular types of operand
// to an instruction.
void addRegOperands(MCInst &Inst, unsigned N) const {
@@ -1164,6 +1168,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
return false;
}
+std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS);
+
bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -1209,8 +1215,13 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(ErrorLoc, "invalid operand for instruction");
}
- case Match_MnemonicFail:
- return Error(IDLoc, "invalid instruction");
+ case Match_MnemonicFail: {
+ uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = SystemZMnemonicSpellCheck(
+ ((SystemZOperand &)*Operands[0]).getToken(), FBS);
+ return Error(IDLoc, "invalid instruction" + Suggestion,
+ ((SystemZOperand &)*Operands[0]).getLocRange());
+ }
}
llvm_unreachable("Unexpected match type");
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
index 6f8431db7b11..9b8b141fd52a 100644
--- a/lib/Target/SystemZ/LLVMBuild.txt
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = SystemZCodeGen
parent = SystemZ
-required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td
index c5faa0d62881..fda9c30fe3fc 100644
--- a/lib/Target/SystemZ/SystemZFeatures.td
+++ b/lib/Target/SystemZ/SystemZFeatures.td
@@ -189,6 +189,58 @@ def Arch11NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
//
+// New features added in the Twelvth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureMiscellaneousExtensions2 : SystemZFeature<
+ "miscellaneous-extensions-2", "MiscellaneousExtensions2",
+ "Assume that the miscellaneous-extensions facility 2 is installed"
+>;
+
+def FeatureGuardedStorage : SystemZFeature<
+ "guarded-storage", "GuardedStorage",
+ "Assume that the guarded-storage facility is installed"
+>;
+
+def FeatureMessageSecurityAssist7 : SystemZFeature<
+ "message-security-assist-extension7", "MessageSecurityAssist7",
+ "Assume that the message-security-assist extension facility 7 is installed"
+>;
+
+def FeatureMessageSecurityAssist8 : SystemZFeature<
+ "message-security-assist-extension8", "MessageSecurityAssist8",
+ "Assume that the message-security-assist extension facility 8 is installed"
+>;
+
+def FeatureVectorEnhancements1 : SystemZFeature<
+ "vector-enhancements-1", "VectorEnhancements1",
+ "Assume that the vector enhancements facility 1 is installed"
+>;
+def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">;
+
+def FeatureVectorPackedDecimal : SystemZFeature<
+ "vector-packed-decimal", "VectorPackedDecimal",
+ "Assume that the vector packed decimal facility is installed"
+>;
+
+def FeatureInsertReferenceBitsMultiple : SystemZFeature<
+ "insert-reference-bits-multiple", "InsertReferenceBitsMultiple",
+ "Assume that the insert-reference-bits-multiple facility is installed"
+>;
+
+def Arch12NewFeatures : SystemZFeatureList<[
+ FeatureMiscellaneousExtensions2,
+ FeatureGuardedStorage,
+ FeatureMessageSecurityAssist7,
+ FeatureMessageSecurityAssist8,
+ FeatureVectorEnhancements1,
+ FeatureVectorPackedDecimal,
+ FeatureInsertReferenceBitsMultiple
+]>;
+
+//===----------------------------------------------------------------------===//
+//
// Cumulative supported and unsupported feature sets
//
//===----------------------------------------------------------------------===//
@@ -201,9 +253,13 @@ def Arch10SupportedFeatures
: SystemZFeatureAdd<Arch9SupportedFeatures.List, Arch10NewFeatures.List>;
def Arch11SupportedFeatures
: SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>;
+def Arch12SupportedFeatures
+ : SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>;
-def Arch11UnsupportedFeatures
+def Arch12UnsupportedFeatures
: SystemZFeatureList<[]>;
+def Arch11UnsupportedFeatures
+ : SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>;
def Arch10UnsupportedFeatures
: SystemZFeatureAdd<Arch11UnsupportedFeatures.List, Arch11NewFeatures.List>;
def Arch9UnsupportedFeatures
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2801141cd951..2d916d2e1521 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -101,7 +101,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
}
- addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+ if (Subtarget.hasVectorEnhancements1())
+ addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
+ else
+ addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
if (Subtarget.hasVector()) {
addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
@@ -316,7 +319,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::AND, VT, Legal);
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
- setOperationAction(ISD::CTPOP, VT, Custom);
+ if (Subtarget.hasVectorEnhancements1())
+ setOperationAction(ISD::CTPOP, VT, Legal);
+ else
+ setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Legal);
setOperationAction(ISD::CTLZ, VT, Legal);
@@ -414,10 +420,60 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
}
+ // The vector enhancements facility 1 has instructions for these.
+ if (Subtarget.hasVectorEnhancements1()) {
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FABS, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::f64, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f128, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::f128, Legal);
+ }
+
// We have fused multiply-addition for f32 and f64 but not f128.
setOperationAction(ISD::FMA, MVT::f32, Legal);
setOperationAction(ISD::FMA, MVT::f64, Legal);
- setOperationAction(ISD::FMA, MVT::f128, Expand);
+ if (Subtarget.hasVectorEnhancements1())
+ setOperationAction(ISD::FMA, MVT::f128, Legal);
+ else
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+ // We don't have a copysign instruction on vector registers.
+ if (Subtarget.hasVectorEnhancements1())
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
// Needed so that we don't try to implement f128 constant loads using
// a load-and-extend of a f80 constant (in cases where the constant
@@ -425,6 +481,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
+ // We don't have extending load instruction on vector registers.
+ if (Subtarget.hasVectorEnhancements1()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
+ }
+
// Floating-point truncation and stores need to be done separately.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
@@ -489,7 +551,7 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
case MVT::f64:
return true;
case MVT::f128:
- return false;
+ return Subtarget.hasVectorEnhancements1();
default:
break;
}
@@ -1462,21 +1524,25 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
return true;
case Intrinsic::s390_vfcedbs:
+ case Intrinsic::s390_vfcesbs:
Opcode = SystemZISD::VFCMPES;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vfchdbs:
+ case Intrinsic::s390_vfchsbs:
Opcode = SystemZISD::VFCMPHS;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vfchedbs:
+ case Intrinsic::s390_vfchesbs:
Opcode = SystemZISD::VFCMPHES;
CCValid = SystemZ::CCMASK_VCMP;
return true;
case Intrinsic::s390_vftcidb:
+ case Intrinsic::s390_vftcisb:
Opcode = SystemZISD::VFTCI;
CCValid = SystemZ::CCMASK_VCMP;
return true;
@@ -2316,11 +2382,15 @@ static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
// producing a result of type VT.
-static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL,
- EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
- // There is no hardware support for v4f32, so extend the vector into
- // two v2f64s and compare those.
- if (CmpOp0.getValueType() == MVT::v4f32) {
+SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
+ const SDLoc &DL, EVT VT,
+ SDValue CmpOp0,
+ SDValue CmpOp1) const {
+ // There is no hardware support for v4f32 (unless we have the vector
+ // enhancements facility 1), so extend the vector into two v2f64s
+ // and compare those.
+ if (CmpOp0.getValueType() == MVT::v4f32 &&
+ !Subtarget.hasVectorEnhancements1()) {
SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
@@ -2334,9 +2404,11 @@ static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL,
// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
// an integer mask of type VT.
-static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
- ISD::CondCode CC, SDValue CmpOp0,
- SDValue CmpOp1) {
+SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
+ const SDLoc &DL, EVT VT,
+ ISD::CondCode CC,
+ SDValue CmpOp0,
+ SDValue CmpOp1) const {
bool IsFP = CmpOp0.getValueType().isFloatingPoint();
bool Invert = false;
SDValue Cmp;
@@ -2960,6 +3032,12 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
// We define this so that it can be used for constant division.
lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
Op.getOperand(1), Ops[1], Ops[0]);
+ else if (Subtarget.hasMiscellaneousExtensions2())
+ // SystemZISD::SMUL_LOHI returns the low result in the odd register and
+ // the high result in the even register. ISD::SMUL_LOHI is defined to
+ // return the low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
else {
// Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
//
@@ -4658,6 +4736,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
OPCODE(POPCNT);
+ OPCODE(SMUL_LOHI);
OPCODE(UMUL_LOHI);
OPCODE(SDIVREM);
OPCODE(UDIVREM);
@@ -6118,6 +6197,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::SelectF32:
case SystemZ::SelectF64:
case SystemZ::SelectF128:
+ case SystemZ::SelectVR128:
return emitSelect(MI, MBB, 0);
case SystemZ::CondStore8Mux:
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 6c9c404816f0..abe8b7233e60 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -88,6 +88,7 @@ enum NodeType : unsigned {
// Wrappers around the ISD opcodes of the same name. The output is GR128.
// Input operands may be GR64 or GR32, depending on the instruction.
+ SMUL_LOHI,
UMUL_LOHI,
SDIVREM,
UDIVREM,
@@ -479,6 +480,12 @@ private:
const SystemZSubtarget &Subtarget;
// Implement LowerOperation for individual opcodes.
+ SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
+ const SDLoc &DL, EVT VT,
+ SDValue CmpOp0, SDValue CmpOp1) const;
+ SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL,
+ EVT VT, ISD::CondCode CC,
+ SDValue CmpOp0, SDValue CmpOp1) const;
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 10172bd45203..02aeaadad0d9 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -12,9 +12,12 @@
//===----------------------------------------------------------------------===//
// C's ?: operator for floating-point operands.
-def SelectF32 : SelectWrapper<FP32>;
-def SelectF64 : SelectWrapper<FP64>;
-def SelectF128 : SelectWrapper<FP128>;
+def SelectF32 : SelectWrapper<f32, FP32>;
+def SelectF64 : SelectWrapper<f64, FP64>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def SelectF128 : SelectWrapper<f128, FP128>;
+let Predicates = [FeatureVectorEnhancements1] in
+ def SelectVR128 : SelectWrapper<f128, VR128>;
defm CondStoreF32 : CondStores<FP32, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
@@ -69,8 +72,9 @@ let Defs = [CC], usesCustomInserter = 1 in {
let Predicates = [FeatureVector] in {
defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
- defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
}
+let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in
+ defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
// Moves between 64-bit integer and floating-point registers.
def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
@@ -83,8 +87,12 @@ let isCodeGenOnly = 1 in {
}
// The sign of an FP128 is in the high register.
-def : Pat<(fcopysign FP32:$src1, FP128:$src2),
- (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))),
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureVectorEnhancements1] in
+ def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
// fcopysign with an FP64 result.
let isCodeGenOnly = 1 in
@@ -92,8 +100,12 @@ let isCodeGenOnly = 1 in
def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;
// The sign of an FP128 is in the high register.
-def : Pat<(fcopysign FP64:$src1, FP128:$src2),
- (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))),
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureVectorEnhancements1] in
+ def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_r64))>;
// fcopysign with an FP128 result. Use "upper" as the high half and leave
// the low half as-is.
@@ -101,12 +113,14 @@ class CopySign128<RegisterOperand cls, dag upper>
: Pat<(fcopysign FP128:$src1, cls:$src2),
(INSERT_SUBREG FP128:$src1, upper, subreg_h64)>;
-def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
- FP32:$src2)>;
-def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
- FP64:$src2)>;
-def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
- (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+let Predicates = [FeatureNoVectorEnhancements1] in {
+ def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ FP32:$src2)>;
+ def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ FP64:$src2)>;
+ def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
+}
defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>;
defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>;
@@ -166,20 +180,32 @@ def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
Requires<[FeatureFPExtension]>;
-def : Pat<(f32 (fpround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
-def : Pat<(f64 (fpround FP128:$src)),
- (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
+let Predicates = [FeatureNoVectorEnhancements1] in {
+ def : Pat<(f32 (fpround FP128:$src)),
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
+ def : Pat<(f64 (fpround FP128:$src)),
+ (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
+}
// Extend register floating-point values to wider representations.
-def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>;
-def LXEBR : UnaryRRE<"lxebr", 0xB306, fpextend, FP128, FP32>;
-def LXDBR : UnaryRRE<"lxdbr", 0xB305, fpextend, FP128, FP64>;
+def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>;
+def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
+def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
+let Predicates = [FeatureNoVectorEnhancements1] in {
+ def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
+ def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
+}
// Extend memory floating-point values to wider representations.
def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
-def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>;
-def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>;
+def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
+def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
+let Predicates = [FeatureNoVectorEnhancements1] in {
+ def : Pat<(f128 (extloadf32 bdxaddr12only:$src)),
+ (LXEB bdxaddr12only:$src)>;
+ def : Pat<(f128 (extloadf64 bdxaddr12only:$src)),
+ (LXDB bdxaddr12only:$src)>;
+}
// Convert a signed integer register value to a floating-point one.
def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
@@ -426,16 +452,18 @@ def : Pat<(fmul (f64 (fpextend FP32:$src1)),
// f128 multiplication of two FP64 registers.
def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
-def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
- (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
- FP64:$src1, subreg_h64), FP64:$src2)>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
+ (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+ FP64:$src1, subreg_h64), FP64:$src2)>;
// f128 multiplication of an FP64 register and an f64 memory.
def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
-def : Pat<(fmul (f128 (fpextend FP64:$src1)),
- (f128 (extloadf64 bdxaddr12only:$addr))),
- (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
- bdxaddr12only:$addr)>;
+let Predicates = [FeatureNoVectorEnhancements1] in
+ def : Pat<(fmul (f128 (fpextend FP64:$src1)),
+ (f128 (extloadf64 bdxaddr12only:$addr))),
+ (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
+ bdxaddr12only:$addr)>;
// Fused multiply-add.
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 7620e06ccbc9..033a0a879d37 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -1091,6 +1091,94 @@ class InstVRIe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{7-0} = op{7-0};
}
+class InstVRIf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<8> I4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23-20} = M5;
+ let Inst{19-12} = I4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIg<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<8> I3;
+ bits<8> I4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-24} = I4;
+ let Inst{23-20} = M5;
+ let Inst{19-12} = I3;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIh<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> I2;
+ bits<4> I3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = 0;
+ let Inst{31-16} = I2;
+ let Inst{15-12} = I3;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<4> R2;
+ bits<8> I3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = R2;
+ let Inst{31-24} = 0;
+ let Inst{23-20} = M4;
+ let Inst{19-12} = I3;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
// Depending on the instruction mnemonic, certain bits may be or-ed into
// the M4 value provided as explicit operand. These are passed as m4or.
class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
@@ -1259,6 +1347,67 @@ class InstVRRf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{7-0} = op{7-0};
}
+class InstVRRg<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = 0;
+ let Inst{35-32} = V1{3-0};
+ let Inst{31-12} = 0;
+ let Inst{11} = 0;
+ let Inst{10} = V1{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRRh<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = 0;
+ let Inst{35-32} = V1{3-0};
+ let Inst{31-28} = V2{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23-20} = M3;
+ let Inst{19-12} = 0;
+ let Inst{11} = 0;
+ let Inst{10} = V1{4};
+ let Inst{9} = V2{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRRi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<5> V2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-24} = 0;
+ let Inst{23-20} = M3;
+ let Inst{19-12} = 0;
+ let Inst{11} = 0;
+ let Inst{10} = V2{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
@@ -1321,6 +1470,25 @@ class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{7-0} = op{7-0};
}
+class InstVRSd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> BD2;
+ bits<4> R3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = 0;
+ let Inst{35-32} = R3;
+ let Inst{31-16} = BD2;
+ let Inst{15-12} = V1{3-0};
+ let Inst{11-9} = 0;
+ let Inst{8} = V1{4};
+ let Inst{7-0} = op{7-0};
+}
+
class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSystemZ<6, outs, ins, asmstr, pattern> {
field bits<48> Inst;
@@ -1358,6 +1526,24 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{7-0} = op{7-0};
}
+class InstVSI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> BD2;
+ bits<8> I3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-32} = I3;
+ let Inst{31-16} = BD2;
+ let Inst{15-12} = V1{3-0};
+ let Inst{11-9} = 0;
+ let Inst{8} = V1{4};
+ let Inst{7-0} = op{7-0};
+}
+
//===----------------------------------------------------------------------===//
// Instruction classes for .insn directives
//===----------------------------------------------------------------------===//
@@ -1910,6 +2096,25 @@ class FixedCondBranchRX<CondVariant V, string mnemonic, bits<8> opcode>
let M1 = V.ccmask;
}
+class CondBranchRXY<string mnemonic, bits<16> opcode>
+ : InstRXYb<opcode, (outs), (ins cond4:$valid, cond4:$M1, bdxaddr20only:$XBD2),
+ !subst("#", "${M1}", mnemonic)#"\t$XBD2", []> {
+ let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRXY<string mnemonic, bits<16> opcode>
+ : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
+ mnemonic#"\t$M1, $XBD2", []>;
+
+class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode,
+ SDPatternOperator operator = null_frag>
+ : InstRXYb<opcode, (outs), (ins bdxaddr20only:$XBD2),
+ !subst("#", V.suffix, mnemonic)#"\t$XBD2",
+ [(operator (load bdxaddr20only:$XBD2))]> {
+ let isAsmParserOnly = V.alternate;
+ let M1 = V.ccmask;
+}
+
class CmpBranchRIEa<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3),
@@ -2272,6 +2477,24 @@ class StoreLengthVRSb<string mnemonic, bits<16> opcode,
let AccessBytes = bytes;
}
+class StoreLengthVRSd<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, bits<5> bytes>
+ : InstVRSd<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $R3, $BD2",
+ [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreLengthVSI<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, bits<5> bytes>
+ : InstVSI<opcode, (outs), (ins VR128:$V1, bdaddr12only:$BD2, imm32zx8:$I3),
+ mnemonic#"\t$V1, $BD2, $I3",
+ [(operator VR128:$V1, imm32zx8:$I3, bdaddr12only:$BD2)]> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
class StoreMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
AddressingMode mode = bdaddr12only>
: InstRSa<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
@@ -2700,6 +2923,11 @@ class SideEffectBinaryRX<string mnemonic, bits<8> opcode,
: InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
mnemonic##"\t$R1, $XBD2", []>;
+class SideEffectBinaryRXY<string mnemonic, bits<16> opcode,
+ RegisterOperand cls>
+ : InstRXYa<opcode, (outs), (ins cls:$R1, bdxaddr20only:$XBD2),
+ mnemonic##"\t$R1, $XBD2", []>;
+
class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,
RegisterOperand cls>
: InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
@@ -3188,6 +3416,11 @@ class BinaryVRIeFloatGeneric<string mnemonic, bits<16> opcode>
(ins VR128:$V2, imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $I3, $M4, $M5", []>;
+class BinaryVRIh<string mnemonic, bits<16> opcode>
+ : InstVRIh<opcode, (outs VR128:$V1),
+ (ins imm32zx16:$I2, imm32zx4:$I3),
+ mnemonic#"\t$V1, $I2, $I3", []>;
+
class BinaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0>
: InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx4:$M5),
@@ -3316,6 +3549,10 @@ class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$V1, $R2, $R3",
[(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>;
+class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $V2, $M3", []>;
+
class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type>
: InstVRSa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, shift12only:$BD2),
@@ -3353,6 +3590,15 @@ class BinaryVRScGeneric<string mnemonic, bits<16> opcode>
(ins VR128:$V3, shift12only:$BD2, imm32zx4: $M4),
mnemonic#"\t$R1, $V3, $BD2, $M4", []>;
+class BinaryVRSd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ bits<5> bytes>
+ : InstVRSd<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $R3, $BD2",
+ [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<5> bytes>
: InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
@@ -3398,6 +3644,15 @@ class StoreBinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
let mayStore = 1;
}
+class BinaryVSI<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ bits<5> bytes>
+ : InstVSI<opcode, (outs VR128:$V1), (ins bdaddr12only:$BD2, imm32zx8:$I3),
+ mnemonic#"\t$V1, $BD2, $I3",
+ [(set VR128:$V1, (operator imm32zx8:$I3, bdaddr12only:$BD2))]> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
Immediate index>
: InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
@@ -3625,6 +3880,12 @@ class CompareVRRaFloatGeneric<string mnemonic, bits<16> opcode>
let M5 = 0;
}
+class CompareVRRh<string mnemonic, bits<16> opcode>
+ : InstVRRh<opcode, (outs), (ins VR128:$V1, VR128:$V2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $V2, $M3", []> {
+ let isCompare = 1;
+}
+
class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls>
: InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
@@ -3639,6 +3900,10 @@ class TestRSL<string mnemonic, bits<16> opcode>
let mayLoad = 1;
}
+class TestVRRg<string mnemonic, bits<16> opcode>
+ : InstVRRg<opcode, (outs), (ins VR128:$V1),
+ mnemonic#"\t$V1", []>;
+
class SideEffectTernarySSc<string mnemonic, bits<8> opcode>
: InstSSc<opcode, (outs), (ins bdladdr12onlylen4:$BDL1,
shift12only:$BD2, imm32zx4:$I3),
@@ -3842,6 +4107,11 @@ class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M5 = type;
}
+class TernaryVRIi<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstVRIi<opcode, (outs VR128:$V1),
+ (ins cls:$R2, imm32zx8:$I3, imm32zx4:$M4),
+ mnemonic#"\t$V1, $R2, $I3, $M4", []>;
+
class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or>
: InstVRRa<opcode, (outs tr1.op:$V1),
@@ -3914,6 +4184,25 @@ class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M6 = 0;
}
+class TernaryVRRcFloat<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
+ bits<4> type = 0, bits<4> m5 = 0>
+ : InstVRRc<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $M6",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ imm32zx4:$M6)))]> {
+ let M4 = type;
+ let M5 = m5;
+}
+
+class TernaryVRRcFloatGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRc<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5,
+ imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>;
+
class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type = 0>
: InstVRRd<opcode, (outs tr1.op:$V1),
@@ -4019,20 +4308,38 @@ class QuaternaryVRIdGeneric<string mnemonic, bits<16> opcode>
let DisableEncoding = "$V1src";
}
+class QuaternaryVRIf<string mnemonic, bits<16> opcode>
+ : InstVRIf<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3,
+ imm32zx8:$I4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>;
+
+class QuaternaryVRIg<string mnemonic, bits<16> opcode>
+ : InstVRIg<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, imm32zx8:$I3,
+ imm32zx8:$I4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $I3, $I4, $M5", []>;
+
class QuaternaryVRRd<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
- bits<4> type, SDPatternOperator m6mask, bits<4> m6or>
+ TypedReg tr3, TypedReg tr4, bits<4> type,
+ SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0>
: InstVRRd<opcode, (outs tr1.op:$V1),
- (ins tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, m6mask:$M6),
+ (ins tr2.op:$V2, tr3.op:$V3, tr4.op:$V4, m6mask:$M6),
mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
[(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
- (tr2.vt tr2.op:$V3),
- (tr2.vt tr2.op:$V4),
+ (tr3.vt tr3.op:$V3),
+ (tr4.vt tr4.op:$V4),
m6mask:$M6)))],
m6or> {
let M5 = type;
}
+class QuaternaryVRRdGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRd<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5, imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+
// Declare a pair of instructions, one which sets CC and one which doesn't.
// The CC-setting form ends with "S" and sets the low bit of M6.
// Also create aliases to make use of M6 operand optional in assembler.
@@ -4041,13 +4348,15 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
SDPatternOperator operator_cc,
TypedReg tr1, TypedReg tr2, bits<4> type,
bits<4> modifier = 0> {
- def "" : QuaternaryVRRd<mnemonic, opcode, operator, tr1, tr2, type,
+ def "" : QuaternaryVRRd<mnemonic, opcode, operator,
+ tr1, tr2, tr2, tr2, type,
imm32zx4even, !and (modifier, 14)>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>;
let Defs = [CC] in
- def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc,
+ tr1, tr2, tr2, tr2, type,
imm32zx4even, !add (!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
@@ -4055,10 +4364,7 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
}
multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
- def "" : InstVRRd<opcode, (outs VR128:$V1),
- (ins VR128:$V2, VR128:$V3, VR128:$V4,
- imm32zx4:$M5, imm32zx4:$M6),
- mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+ def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
VR128:$V4, imm32zx4:$M5, 0)>;
@@ -4366,10 +4672,10 @@ class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
// the value of the PSW's 2-bit condition code field.
-class SelectWrapper<RegisterOperand cls>
+class SelectWrapper<ValueType vt, RegisterOperand cls>
: Pseudo<(outs cls:$dst),
(ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
- [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2,
+ [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2,
imm32zx4:$valid, imm32zx4:$cc))]> {
let usesCustomInserter = 1;
// Although the instructions used by these nodes do not in themselves
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 66a5ff12be46..4533f4fdf21a 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -869,6 +869,37 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // Move 128-bit floating-point values between VR128 and FP128.
+ if (SystemZ::VR128BitRegClass.contains(DestReg) &&
+ SystemZ::FP128BitRegClass.contains(SrcReg)) {
+ unsigned SrcRegHi =
+ RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_h64),
+ SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ unsigned SrcRegLo =
+ RI.getMatchingSuperReg(RI.getSubReg(SrcReg, SystemZ::subreg_l64),
+ SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+
+ BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg)
+ .addReg(SrcRegHi, getKillRegState(KillSrc))
+ .addReg(SrcRegLo, getKillRegState(KillSrc));
+ return;
+ }
+ if (SystemZ::FP128BitRegClass.contains(DestReg) &&
+ SystemZ::VR128BitRegClass.contains(SrcReg)) {
+ unsigned DestRegHi =
+ RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_h64),
+ SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+ unsigned DestRegLo =
+ RI.getMatchingSuperReg(RI.getSubReg(DestReg, SystemZ::subreg_l64),
+ SystemZ::subreg_r64, &SystemZ::VR128BitRegClass);
+
+ if (DestRegHi != SrcReg)
+ copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1);
+ return;
+ }
+
// Everything else needs only one instruction.
unsigned Opcode;
if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
@@ -1434,6 +1465,7 @@ SystemZII::Branch
SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case SystemZ::BR:
+ case SystemZ::BI:
case SystemZ::J:
case SystemZ::JG:
return SystemZII::Branch(SystemZII::BranchNormal, SystemZ::CCMASK_ANY,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 4569be7602e4..f64c0d15ef83 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -48,6 +48,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
let isIndirectBranch = 1 in {
def BC : CondBranchRX<"b#", 0x47>;
def BCR : CondBranchRR<"b#r", 0x07>;
+ def BIC : CondBranchRXY<"bi#", 0xe347>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
}
}
@@ -58,6 +60,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
let isIndirectBranch = 1 in {
def BCAsm : AsmCondBranchRX<"bc", 0x47>;
def BCRAsm : AsmCondBranchRR<"bcr", 0x07>;
+ def BICAsm : AsmCondBranchRXY<"bic", 0xe347>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
}
// Define AsmParser extended mnemonics for each general condition-code mask
@@ -69,6 +73,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
let isIndirectBranch = 1 in {
def BAsm#V : FixedCondBranchRX <CV<V>, "b#", 0x47>;
def BRAsm#V : FixedCondBranchRR <CV<V>, "b#r", 0x07>;
+ def BIAsm#V : FixedCondBranchRXY<CV<V>, "bi#", 0xe347>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
}
}
}
@@ -81,6 +87,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isIndirectBranch = 1 in {
def B : FixedCondBranchRX<CondAlways, "b", 0x47>;
def BR : FixedCondBranchRR<CondAlways, "br", 0x07, brind>;
+ def BI : FixedCondBranchRXY<CondAlways, "bi", 0xe347, brind>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
}
}
@@ -316,9 +324,9 @@ let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in {
// Select instructions
//===----------------------------------------------------------------------===//
-def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
-def Select32 : SelectWrapper<GR32>;
-def Select64 : SelectWrapper<GR64>;
+def Select32Mux : SelectWrapper<i32, GRX32>, Requires<[FeatureHighWord]>;
+def Select32 : SelectWrapper<i32, GR32>;
+def Select64 : SelectWrapper<i64, GR64>;
// We don't define 32-bit Mux stores if we don't have STOCFH, because the
// low-only STOC should then always be used if possible.
@@ -921,6 +929,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
// Addition of memory.
defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>;
defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>;
+ def AGH : BinaryRXY<"agh", 0xE338, add, GR64, asextloadi16, 2>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>;
def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>;
@@ -1006,6 +1016,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
// Subtraction of memory.
defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>;
+ def SGH : BinaryRXY<"sgh", 0xE339, sub, GR64, asextloadi16, 2>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>;
def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>;
}
@@ -1207,6 +1219,15 @@ defm : RMWIByte<xor, bdaddr20pair, XIY>;
// Multiplication
//===----------------------------------------------------------------------===//
+// Multiplication of a register, setting the condition code. We prefer these
+// over MS(G)R if available, even though we cannot use the condition code,
+// since they are three-operand instructions.
+let Predicates = [FeatureMiscellaneousExtensions2],
+ Defs = [CC], isCommutable = 1 in {
+ def MSRKC : BinaryRRFa<"msrkc", 0xB9FD, mul, GR32, GR32, GR32>;
+ def MSGRKC : BinaryRRFa<"msgrkc", 0xB9ED, mul, GR64, GR64, GR64>;
+}
+
// Multiplication of a register.
let isCommutable = 1 in {
def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>;
@@ -1226,21 +1247,37 @@ def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
// Multiplication of memory.
defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>;
defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>;
+def MGH : BinaryRXY<"mgh", 0xE33C, mul, GR64, asextloadi16, 2>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>;
def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
+// Multiplication of memory, setting the condition code.
+let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in {
+ def MSC : BinaryRXY<"msc", 0xE353, null_frag, GR32, load, 4>;
+ def MSGC : BinaryRXY<"msgc", 0xE383, null_frag, GR64, load, 8>;
+}
+
// Multiplication of a register, producing two results.
-def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>;
+def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>;
+def MGRK : BinaryRRFa<"mgrk", 0xB9EC, null_frag, GR128, GR64, GR64>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>;
def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>;
+def : Pat<(z_smul_lohi GR64:$src1, GR64:$src2),
+ (MGRK GR64:$src1, GR64:$src2)>;
def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2),
(MLGR (AEXT128 GR64:$src1), GR64:$src2)>;
// Multiplication of memory, producing two results.
def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>;
def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>;
+def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, load, 8>,
+ Requires<[FeatureMiscellaneousExtensions2]>;
def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>;
def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>;
+def : Pat<(z_smul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))),
+ (MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))),
(MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>;
@@ -1765,8 +1802,29 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
GR128, GR128, GR128>;
def PCC : SideEffectInherentRRE<"pcc", 0xB92C>;
}
+
let Predicates = [FeatureMessageSecurityAssist5] in
- def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>;
+ def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>;
+ let Predicates = [FeatureMessageSecurityAssist7], isAsmParserOnly = 1 in
+ def PRNO : SideEffectBinaryMemMemRRE<"prno", 0xB93C, GR128, GR128>;
+
+ let Predicates = [FeatureMessageSecurityAssist8] in
+ def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929,
+ GR128, GR128, GR128>;
+}
+
+//===----------------------------------------------------------------------===//
+// Guarded storage
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureGuardedStorage] in {
+ def LGG : UnaryRXY<"lgg", 0xE34C, null_frag, GR64, 8>;
+ def LLGFSG : UnaryRXY<"llgfsg", 0xE348, null_frag, GR64, 4>;
+
+ let mayLoad = 1 in
+ def LGSC : SideEffectBinaryRXY<"lgsc", 0xE34D, GR64>;
+ let mayStore = 1 in
+ def STGSC : SideEffectBinaryRXY<"stgsc", 0xE349, GR64>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td
index a9803c2d83e9..0112ebf1eb10 100644
--- a/lib/Target/SystemZ/SystemZInstrSystem.td
+++ b/lib/Target/SystemZ/SystemZInstrSystem.td
@@ -126,6 +126,10 @@ let hasSideEffects = 1, Defs = [CC] in
let Predicates = [FeatureResetReferenceBitsMultiple], hasSideEffects = 1 in
def RRBM : UnaryRRE<"rrbm", 0xB9AE, null_frag, GR64, GR64>;
+// Insert reference bits multiple.
+let Predicates = [FeatureInsertReferenceBitsMultiple], hasSideEffects = 1 in
+ def IRBM : UnaryRRE<"irbm", 0xB9AC, null_frag, GR64, GR64>;
+
// Perform frame management function.
let hasSideEffects = 1 in
def PFMF : SideEffectBinaryMemRRE<"pfmf", 0xB9AF, GR32, GR64>;
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
index 0158fe6aec08..c9a02d9c8082 100644
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -14,7 +14,7 @@
let Predicates = [FeatureVector] in {
// Register move.
def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
- def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
+ def VLR32 : UnaryAliasVRR<null_frag, v32sb, v32sb>;
def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
// Load GR from VR element.
@@ -141,7 +141,7 @@ let Predicates = [FeatureVector] in {
// LEY and LDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
- def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
+ def VL32 : UnaryAliasVRX<load, v32sb, bdxaddr12pair>;
def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
// Load logical element and zero.
@@ -154,6 +154,11 @@ let Predicates = [FeatureVector] in {
(VLLEZF bdxaddr12only:$addr)>;
def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
(VLLEZG bdxaddr12only:$addr)>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VLLEZLF : UnaryVRX<"vllezlf", 0xE704, z_vllezli32, v128f, 4, 6>;
+ def : Pat<(v4f32 (z_vllezlf32 bdxaddr12only:$addr)),
+ (VLLEZLF bdxaddr12only:$addr)>;
+ }
// Load element.
def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>;
@@ -170,6 +175,13 @@ let Predicates = [FeatureVector] in {
def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>;
}
+let Predicates = [FeatureVectorPackedDecimal] in {
+ // Load rightmost with length. The number of loaded bytes is only known
+ // at run time.
+ def VLRL : BinaryVSI<"vlrl", 0xE635, int_s390_vlrl, 0>;
+ def VLRLR : BinaryVRSd<"vlrlr", 0xE637, int_s390_vlrl, 0>;
+}
+
// Use replicating loads if we're inserting a single element into an
// undefined vector. This avoids a false dependency on the previous
// register contents.
@@ -219,7 +231,7 @@ let Predicates = [FeatureVector] in {
// STEY and STDY offer full 20-bit displacement fields. It's often better
// to use those instructions rather than force a 20-bit displacement
// into a GPR temporary.
- def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
+ def VST32 : StoreAliasVRX<store, v32sb, bdxaddr12pair>;
def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
// Scatter element.
@@ -227,6 +239,13 @@ let Predicates = [FeatureVector] in {
def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
}
+let Predicates = [FeatureVectorPackedDecimal] in {
+ // Store rightmost with length. The number of stored bytes is only known
+ // at run time.
+ def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, int_s390_vstrl, 0>;
+ def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>;
+}
+
//===----------------------------------------------------------------------===//
// Selects and permutes
//===----------------------------------------------------------------------===//
@@ -256,6 +275,10 @@ let Predicates = [FeatureVector] in {
// Permute doubleword immediate.
def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>;
+ // Bit Permute.
+ let Predicates = [FeatureVectorEnhancements1] in
+ def VBPERM : BinaryVRRc<"vbperm", 0xE785, int_s390_vbperm, v128g, v128b>;
+
// Replicate.
def VREP: BinaryVRIcGeneric<"vrep", 0xE74D>;
def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>;
@@ -424,6 +447,10 @@ let Predicates = [FeatureVector] in {
def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
+ // Not exclusive or.
+ let Predicates = [FeatureVectorEnhancements1] in
+ def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>;
+
// Exclusive or.
def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
@@ -567,6 +594,17 @@ let Predicates = [FeatureVector] in {
def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+ // Multiply sum logical.
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VMSL : QuaternaryVRRdGeneric<"vmsl", 0xE7B8>;
+ def VMSLG : QuaternaryVRRd<"vmslg", 0xE7B8, int_s390_vmslg,
+ v128q, v128g, v128g, v128q, 3>;
+ }
+
+ // Nand.
+ let Predicates = [FeatureVectorEnhancements1] in
+ def VNN : BinaryVRRc<"vnn", 0xE76E, null_frag, v128any, v128any>;
+
// Nor.
def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>;
@@ -574,9 +612,19 @@ let Predicates = [FeatureVector] in {
// Or.
def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
+ // Or with complement.
+ let Predicates = [FeatureVectorEnhancements1] in
+ def VOC : BinaryVRRc<"voc", 0xE76F, null_frag, v128any, v128any>;
+
// Population count.
def VPOPCT : UnaryVRRaGeneric<"vpopct", 0xE750>;
def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VPOPCTB : UnaryVRRa<"vpopctb", 0xE750, ctpop, v128b, v128b, 0>;
+ def VPOPCTH : UnaryVRRa<"vpopcth", 0xE750, ctpop, v128h, v128h, 1>;
+ def VPOPCTF : UnaryVRRa<"vpopctf", 0xE750, ctpop, v128f, v128f, 2>;
+ def VPOPCTG : UnaryVRRa<"vpopctg", 0xE750, ctpop, v128g, v128g, 3>;
+ }
// Element rotate left logical (with vector shift amount).
def VERLLV : BinaryVRRcGeneric<"verllv", 0xE773>;
@@ -724,6 +772,14 @@ multiclass BitwiseVectorOps<ValueType type> {
(VNO VR128:$x, VR128:$y)>;
def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>;
}
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def : Pat<(type (z_vnot (xor VR128:$x, VR128:$y))),
+ (VNX VR128:$x, VR128:$y)>;
+ def : Pat<(type (z_vnot (and VR128:$x, VR128:$y))),
+ (VNN VR128:$x, VR128:$y)>;
+ def : Pat<(type (or VR128:$x, (z_vnot VR128:$y))),
+ (VOC VR128:$x, VR128:$y)>;
+ }
}
defm : BitwiseVectorOps<v16i8>;
@@ -879,6 +935,11 @@ let Predicates = [FeatureVector] in {
def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
+ def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
+ def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>;
+ }
// Convert from fixed 64-bit.
def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
@@ -910,6 +971,11 @@ let Predicates = [FeatureVector] in {
def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
+ def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
+ def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>;
+ }
// Load FP integer.
def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
@@ -917,66 +983,213 @@ let Predicates = [FeatureVector] in {
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
defm : VectorRounding<VFIDB, v128db>;
defm : VectorRounding<WFIDB, v64db>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
+ def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
+ def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
+ defm : VectorRounding<VFISB, v128sb>;
+ defm : VectorRounding<WFISB, v32sb>;
+ defm : VectorRounding<WFIXB, v128xb>;
+ }
// Load lengthened.
def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
- def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
- def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32eb, 2, 8>;
+ def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ let isAsmParserOnly = 1 in {
+ def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
+ def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
+ def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
+ }
+ def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
+ def : Pat<(f128 (fpextend (f32 VR32:$src))),
+ (WFLLD (WLDEB VR32:$src))>;
+ }
- // Load rounded,
+ // Load rounded.
def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
- def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
- def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
+ def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
+ def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
- def : FPConversion<WLEDB, fpround, v32eb, v64db, 0, 0>;
+ def : FPConversion<WLEDB, fpround, v32sb, v64db, 0, 0>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ let isAsmParserOnly = 1 in {
+ def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
+ def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
+ def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+ }
+ def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
+ def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>;
+ def : Pat<(f32 (fpround (f128 VR128:$src))),
+ (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>;
+ }
+
+ // Maximum.
+ multiclass VectorMax<Instruction insn, TypedReg tr> {
+ def : FPMinMax<insn, fmaxnum, tr, 4>;
+ def : FPMinMax<insn, fmaxnan, tr, 1>;
+ }
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
+ def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
+ v128db, v128db, 3, 0>;
+ def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
+ v64db, v64db, 3, 8>;
+ def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb,
+ v128sb, v128sb, 2, 0>;
+ def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
+ v32sb, v32sb, 2, 8>;
+ def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag,
+ v128xb, v128xb, 4, 8>;
+ defm : VectorMax<VFMAXDB, v128db>;
+ defm : VectorMax<WFMAXDB, v64db>;
+ defm : VectorMax<VFMAXSB, v128sb>;
+ defm : VectorMax<WFMAXSB, v32sb>;
+ defm : VectorMax<WFMAXXB, v128xb>;
+ }
+
+ // Minimum.
+ multiclass VectorMin<Instruction insn, TypedReg tr> {
+ def : FPMinMax<insn, fminnum, tr, 4>;
+ def : FPMinMax<insn, fminnan, tr, 1>;
+ }
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
+ def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
+ v128db, v128db, 3, 0>;
+ def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
+ v64db, v64db, 3, 8>;
+ def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb,
+ v128sb, v128sb, 2, 0>;
+ def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
+ v32sb, v32sb, 2, 8>;
+ def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag,
+ v128xb, v128xb, 4, 8>;
+ defm : VectorMin<VFMINDB, v128db>;
+ defm : VectorMin<WFMINDB, v64db>;
+ defm : VectorMin<VFMINSB, v128sb>;
+ defm : VectorMin<WFMINSB, v32sb>;
+ defm : VectorMin<WFMINXB, v128xb>;
+ }
// Multiply.
def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
+ def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
+ def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>;
+ }
// Multiply and add.
def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
+ def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
+ def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>;
+ }
// Multiply and subtract.
def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
+ def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
+ def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>;
+ }
+
+ // Negative multiply and add.
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
+ def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
+ def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
+ def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
+ def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
+ def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>;
+ }
+
+ // Negative multiply and subtract.
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
+ def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
+ def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
+ def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
+ def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
+ def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>;
+ }
// Perform sign operation.
def VFPSO : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>;
def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>;
def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFPSOSB : BinaryVRRa<"vfpsosb", 0xE7CC, null_frag, v128sb, v128sb, 2, 0>;
+ def WFPSOSB : BinaryVRRa<"wfpsosb", 0xE7CC, null_frag, v32sb, v32sb, 2, 8>;
+ def WFPSOXB : BinaryVRRa<"wfpsoxb", 0xE7CC, null_frag, v128xb, v128xb, 4, 8>;
+ }
// Load complement.
def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFLCSB : UnaryVRRa<"vflcsb", 0xE7CC, fneg, v128sb, v128sb, 2, 0, 0>;
+ def WFLCSB : UnaryVRRa<"wflcsb", 0xE7CC, fneg, v32sb, v32sb, 2, 8, 0>;
+ def WFLCXB : UnaryVRRa<"wflcxb", 0xE7CC, fneg, v128xb, v128xb, 4, 8, 0>;
+ }
// Load negative.
def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFLNSB : UnaryVRRa<"vflnsb", 0xE7CC, fnabs, v128sb, v128sb, 2, 0, 1>;
+ def WFLNSB : UnaryVRRa<"wflnsb", 0xE7CC, fnabs, v32sb, v32sb, 2, 8, 1>;
+ def WFLNXB : UnaryVRRa<"wflnxb", 0xE7CC, fnabs, v128xb, v128xb, 4, 8, 1>;
+ }
// Load positive.
def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFLPSB : UnaryVRRa<"vflpsb", 0xE7CC, fabs, v128sb, v128sb, 2, 0, 2>;
+ def WFLPSB : UnaryVRRa<"wflpsb", 0xE7CC, fabs, v32sb, v32sb, 2, 8, 2>;
+ def WFLPXB : UnaryVRRa<"wflpxb", 0xE7CC, fabs, v128xb, v128xb, 4, 8, 2>;
+ }
// Square root.
def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
+ def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
+ def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>;
+ }
// Subtract.
def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
+ def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
+ def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>;
+ }
// Test data class immediate.
let Defs = [CC] in {
def VFTCI : BinaryVRIeFloatGeneric<"vftci", 0xE74A>;
def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFTCISB : BinaryVRIe<"vftcisb", 0xE74A, z_vftci, v128f, v128sb, 2, 0>;
+ def WFTCISB : BinaryVRIe<"wftcisb", 0xE74A, null_frag, v32f, v32sb, 2, 8>;
+ def WFTCIXB : BinaryVRIe<"wftcixb", 0xE74A, null_frag, v128q, v128xb, 4, 8>;
+ }
}
}
@@ -989,12 +1202,20 @@ let Predicates = [FeatureVector] in {
let Defs = [CC] in {
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>;
+ def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>;
+ }
}
// Compare and signal scalar.
let Defs = [CC] in {
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>;
+ def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>;
+ }
}
// Compare equal.
@@ -1003,6 +1224,28 @@ let Predicates = [FeatureVector] in {
v128g, v128db, 3, 0>;
defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
v64g, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ v128f, v128sb, 2, 0>;
+ defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
+ v32f, v32sb, 2, 8>;
+ defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
+ }
+
+ // Compare and signal equal.
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
+ v128g, v128db, 3, 4>;
+ defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
+ v64g, v64db, 3, 12>;
+ defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag,
+ v128f, v128sb, 2, 4>;
+ defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag,
+ v32f, v32sb, 2, 12>;
+ defm WFKEXB : BinaryVRRcSPair<"wfkexb", 0xE7E8, null_frag, null_frag,
+ v128q, v128xb, 4, 12>;
+ }
// Compare high.
def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
@@ -1010,6 +1253,28 @@ let Predicates = [FeatureVector] in {
v128g, v128db, 3, 0>;
defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
v64g, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ v128f, v128sb, 2, 0>;
+ defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
+ v32f, v32sb, 2, 8>;
+ defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
+ }
+
+ // Compare and signal high.
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
+ v128g, v128db, 3, 4>;
+ defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
+ v64g, v64db, 3, 12>;
+ defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag,
+ v128f, v128sb, 2, 4>;
+ defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag,
+ v32f, v32sb, 2, 12>;
+ defm WFKHXB : BinaryVRRcSPair<"wfkhxb", 0xE7EB, null_frag, null_frag,
+ v128q, v128xb, 4, 12>;
+ }
// Compare high or equal.
def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
@@ -1017,6 +1282,28 @@ let Predicates = [FeatureVector] in {
v128g, v128db, 3, 0>;
defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
v64g, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ v128f, v128sb, 2, 0>;
+ defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
+ v32f, v32sb, 2, 8>;
+ defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
+ }
+
+ // Compare and signal high or equal.
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
+ v128g, v128db, 3, 4>;
+ defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
+ v64g, v64db, 3, 12>;
+ defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag,
+ v128f, v128sb, 2, 4>;
+ defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag,
+ v32f, v32sb, 2, 12>;
+ defm WFKHEXB : BinaryVRRcSPair<"wfkhexb", 0xE7EA, null_frag, null_frag,
+ v128q, v128xb, 4, 12>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -1028,36 +1315,49 @@ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (f128 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (f128 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (f128 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (f128 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (f128 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (f128 VR128:$src))), (v2f64 VR128:$src)>;
+
+def : Pat<(f128 (bitconvert (v16i8 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v2i64 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 VR128:$src))), (f128 VR128:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 VR128:$src))), (f128 VR128:$src)>;
//===----------------------------------------------------------------------===//
// Replicating scalars
@@ -1134,6 +1434,20 @@ let AddedComplexity = 4 in {
}
//===----------------------------------------------------------------------===//
+// Support for 128-bit floating-point values in vector registers
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVectorEnhancements1] in {
+ def : Pat<(f128 (load bdxaddr12only:$addr)),
+ (VL bdxaddr12only:$addr)>;
+ def : Pat<(store (f128 VR128:$src), bdxaddr12only:$addr),
+ (VST VR128:$src, bdxaddr12only:$addr)>;
+
+ def : Pat<(f128 fpimm0), (VZERO)>;
+ def : Pat<(f128 fpimmneg0), (WFLNXB (VZERO))>;
+}
+
+//===----------------------------------------------------------------------===//
// String instructions
//===----------------------------------------------------------------------===//
@@ -1202,3 +1516,37 @@ let Predicates = [FeatureVector] in {
defm VSTRCZF : QuaternaryOptVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf,
z_vstrcz_cc, v128f, v128f, 2, 2>;
}
+
+//===----------------------------------------------------------------------===//
+// Packed-decimal instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVectorPackedDecimal] in {
+ def VLIP : BinaryVRIh<"vlip", 0xE649>;
+
+ def VPKZ : BinaryVSI<"vpkz", 0xE634, null_frag, 0>;
+ def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>;
+
+ let Defs = [CC] in {
+ def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>;
+ def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>;
+ def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>;
+ def VCVDG : TernaryVRIi<"vcvdg", 0xE65A, GR64>;
+
+ def VAP : QuaternaryVRIf<"vap", 0xE671>;
+ def VSP : QuaternaryVRIf<"vsp", 0xE673>;
+
+ def VMP : QuaternaryVRIf<"vmp", 0xE678>;
+ def VMSP : QuaternaryVRIf<"vmsp", 0xE679>;
+
+ def VDP : QuaternaryVRIf<"vdp", 0xE67A>;
+ def VRP : QuaternaryVRIf<"vrp", 0xE67B>;
+ def VSDP : QuaternaryVRIf<"vsdp", 0xE67E>;
+
+ def VSRP : QuaternaryVRIg<"vsrp", 0xE659>;
+ def VPSOP : QuaternaryVRIg<"vpsop", 0xE65B>;
+
+ def VTP : TestVRRg<"vtp", 0xE65F>;
+ def VCP : CompareVRRh<"vcp", 0xE677>;
+ }
+}
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 9c6d5819f8a7..759a8bb0ce14 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -181,6 +181,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
[SDNPInGlue]>;
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
+def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;
def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;
def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>;
def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>;
@@ -549,6 +550,12 @@ def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(fma node:$src2, node:$src3, (fneg node:$src1))>;
+// Negative fused multiply-add and multiply-subtract.
+def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fneg (fma node:$src1, node:$src2, node:$src3))>;
+def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fneg (fms node:$src1, node:$src2, node:$src3))>;
+
// Floating-point negative absolute.
def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
@@ -624,6 +631,19 @@ def z_vllezf64 : PatFrag<(ops node:$addr),
(scalar_to_vector (f64 (load node:$addr))),
(z_vzero))>;
+// Similarly for the high element of a zeroed vector.
+def z_vllezli32 : z_vllez<i32, load, 0>;
+def z_vllezlf32 : PatFrag<(ops node:$addr),
+ (bitconvert
+ (z_merge_high
+ (v2i64
+ (bitconvert
+ (z_merge_high
+ (v4f32 (scalar_to_vector
+ (f32 (load node:$addr)))),
+ (v4f32 (z_vzero))))),
+ (v2i64 (z_vzero))))>;
+
// Store one element of a vector.
class z_vste<ValueType scalartype, SDPatternOperator store>
: PatFrag<(ops node:$vec, node:$addr, node:$index),
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index 16a7ed784d70..152521fb66a8 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -167,3 +167,10 @@ class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
TypedReg tr2, bits<3> suppress, bits<4> mode>
: Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
(insn tr2.op:$vec, suppress, mode)>;
+
+// Use INSN to perform mininum/maximum operation OPERATOR on type TR.
+// FUNCTION is the type of minimum/maximum function to perform.
+class FPMinMax<Instruction insn, SDPatternOperator operator, TypedReg tr,
+ bits<4> function>
+ : Pat<(tr.vt (operator (tr.vt tr.op:$vec1), (tr.vt tr.op:$vec2))),
+ (insn tr.op:$vec1, tr.op:$vec2, function)>;
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 1cdc0949ff4a..0dca4582dc0d 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -33,3 +33,6 @@ def : ProcessorModel<"zEC12", ZEC12Model, Arch10SupportedFeatures.List>;
def : ProcessorModel<"arch11", Z13Model, Arch11SupportedFeatures.List>;
def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>;
+def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>;
+def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>;
+
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 36809ea81dc1..52ba1a584017 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -260,10 +260,10 @@ defm VF128 : SystemZRegClass<"VF128",
// All vector registers.
defm VR128 : SystemZRegClass<"VR128",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
- (add (sequence "V%u", 0, 7),
- (sequence "V%u", 16, 31),
- (sequence "V%u", 8, 15))>;
+ [f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (add (sequence "V%u", 0, 7),
+ (sequence "V%u", 16, 31),
+ (sequence "V%u", 8, 15))>;
// Attaches a ValueType to a register operand, to make the instruction
// definitions easier.
@@ -272,7 +272,8 @@ class TypedReg<ValueType vtin, RegisterOperand opin> {
RegisterOperand op = opin;
}
-def v32eb : TypedReg<f32, VR32>;
+def v32f : TypedReg<i32, VR32>;
+def v32sb : TypedReg<f32, VR32>;
def v64g : TypedReg<i64, VR64>;
def v64db : TypedReg<f64, VR64>;
def v128b : TypedReg<v16i8, VR128>;
@@ -280,8 +281,9 @@ def v128h : TypedReg<v8i16, VR128>;
def v128f : TypedReg<v4i32, VR128>;
def v128g : TypedReg<v2i64, VR128>;
def v128q : TypedReg<v16i8, VR128>;
-def v128eb : TypedReg<v4f32, VR128>;
+def v128sb : TypedReg<v4f32, VR128>;
def v128db : TypedReg<v2f64, VR128>;
+def v128xb : TypedReg<f128, VR128>;
def v128any : TypedReg<untyped, VR128>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZSchedule.td b/lib/Target/SystemZ/SystemZSchedule.td
index 1ce0168f95e9..8dba89f70a42 100644
--- a/lib/Target/SystemZ/SystemZSchedule.td
+++ b/lib/Target/SystemZ/SystemZSchedule.td
@@ -59,7 +59,7 @@ def FPU2 : SchedWrite;
def DFU : SchedWrite;
def DFU2 : SchedWrite;
-// Vector sub units (z13)
+// Vector sub units (z13 and later)
def VecBF : SchedWrite;
def VecBF2 : SchedWrite;
def VecDF : SchedWrite;
@@ -75,6 +75,7 @@ def VecXsPm : SchedWrite;
def VBU : SchedWrite;
+include "SystemZScheduleZ14.td"
include "SystemZScheduleZ13.td"
include "SystemZScheduleZEC12.td"
include "SystemZScheduleZ196.td"
diff --git a/lib/Target/SystemZ/SystemZScheduleZ14.td b/lib/Target/SystemZ/SystemZScheduleZ14.td
new file mode 100644
index 000000000000..f11177af91a5
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -0,0 +1,1611 @@
+//-- SystemZScheduleZ14.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z14 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Z14Model : SchedMachineModel {
+
+ let UnsupportedFeatures = Arch12UnsupportedFeatures.List;
+
+ let IssueWidth = 8;
+ let MicroOpBufferSize = 60; // Issue queues
+ let LoadLatency = 1; // Optimistic load latency.
+
+ let PostRAScheduler = 1;
+
+ // Extra cycles for a mispredicted branch.
+ let MispredictPenalty = 20;
+}
+
+let SchedModel = Z14Model in {
+
+// These definitions could be put in a subtarget common include file,
+// but it seems the include system in Tablegen currently rejects
+// multiple includes of same file.
+def : WriteRes<GroupAlone, []> {
+ let NumMicroOps = 0;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<BeginGroup, []> {
+ let NumMicroOps = 0;
+ let BeginGroup = 1;
+}
+def : WriteRes<EndGroup, []> {
+ let NumMicroOps = 0;
+ let EndGroup = 1;
+}
+def : WriteRes<Lat2, []> { let Latency = 2; let NumMicroOps = 0;}
+def : WriteRes<Lat3, []> { let Latency = 3; let NumMicroOps = 0;}
+def : WriteRes<Lat4, []> { let Latency = 4; let NumMicroOps = 0;}
+def : WriteRes<Lat5, []> { let Latency = 5; let NumMicroOps = 0;}
+def : WriteRes<Lat6, []> { let Latency = 6; let NumMicroOps = 0;}
+def : WriteRes<Lat7, []> { let Latency = 7; let NumMicroOps = 0;}
+def : WriteRes<Lat8, []> { let Latency = 8; let NumMicroOps = 0;}
+def : WriteRes<Lat9, []> { let Latency = 9; let NumMicroOps = 0;}
+def : WriteRes<Lat10, []> { let Latency = 10; let NumMicroOps = 0;}
+def : WriteRes<Lat11, []> { let Latency = 11; let NumMicroOps = 0;}
+def : WriteRes<Lat12, []> { let Latency = 12; let NumMicroOps = 0;}
+def : WriteRes<Lat15, []> { let Latency = 15; let NumMicroOps = 0;}
+def : WriteRes<Lat20, []> { let Latency = 20; let NumMicroOps = 0;}
+def : WriteRes<Lat30, []> { let Latency = 30; let NumMicroOps = 0;}
+
+// Execution units.
+def Z14_FXaUnit : ProcResource<2>;
+def Z14_FXbUnit : ProcResource<2>;
+def Z14_LSUnit : ProcResource<2>;
+def Z14_VecUnit : ProcResource<2>;
+def Z14_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Z14_VBUnit : ProcResource<2>;
+
+// Subtarget specific definitions of scheduling resources.
+def : WriteRes<FXa, [Z14_FXaUnit]> { let Latency = 1; }
+def : WriteRes<FXa2, [Z14_FXaUnit, Z14_FXaUnit]> { let Latency = 2; }
+def : WriteRes<FXb, [Z14_FXbUnit]> { let Latency = 1; }
+def : WriteRes<LSU, [Z14_LSUnit]> { let Latency = 4; }
+def : WriteRes<VecBF, [Z14_VecUnit]> { let Latency = 8; }
+def : WriteRes<VecBF2, [Z14_VecUnit, Z14_VecUnit]> { let Latency = 9; }
+def : WriteRes<VecDF, [Z14_VecUnit]> { let Latency = 8; }
+def : WriteRes<VecDF2, [Z14_VecUnit, Z14_VecUnit]> { let Latency = 9; }
+def : WriteRes<VecDFX, [Z14_VecUnit]> { let Latency = 1; }
+def : WriteRes<VecDFX2, [Z14_VecUnit, Z14_VecUnit]> { let Latency = 2; }
+def : WriteRes<VecFPd, [Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit,
+ Z14_VecFPdUnit, Z14_VecFPdUnit, Z14_VecFPdUnit]>
+ { let Latency = 30; }
+def : WriteRes<VecMul, [Z14_VecUnit]> { let Latency = 5; }
+def : WriteRes<VecStr, [Z14_VecUnit]> { let Latency = 4; }
+def : WriteRes<VecXsPm, [Z14_VecUnit]> { let Latency = 3; }
+def : WriteRes<VBU, [Z14_VBUnit]>; // Virtual Branching Unit
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[VBU], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[VBU], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "BI(C)?(Asm.*)?$")>;
+def : InstRW<[FXa, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[FXa, FXa, FXb, FXb, Lat4, GroupAlone],
+ (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[FXb], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[FXb, FXb, Lat2, GroupAlone],
+ (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[VBU], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[FXb], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[VBU, FXa, FXa, Lat3, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[FXb, EndGroup], (instregex "Return$")>;
+def : InstRW<[FXb], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+// Select pseudo
+def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>;
+
+// CondStore pseudos
+def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[FXb, LSU, Lat5], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[FXb, LSU, LSU, LSU, Lat8, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXa], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXa], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXa], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXa], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXa], (instregex "SUBREG_TO_REG$")>;
+
+// Loads
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux|CBB)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "L128$")>;
+
+def : InstRW<[FXa], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXa], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[FXa], (instregex "LG(F|H)I$")>;
+def : InstRW<[FXa], (instregex "LHI(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LR(Mux)?$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSU], (instregex "LZR(F|G)$")>;
+
+// Load and trap
+def : InstRW<[FXb, LSU, Lat5], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[FXa, LSU, Lat5], (instregex "LT(G)?$")>;
+def : InstRW<[FXa], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[FXb, LSU, Lat5], (instregex "STG(RL)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "ST128$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "L(B|H|G)R$")>;
+def : InstRW<[FXa], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "LTGF$")>;
+def : InstRW<[FXa], (instregex "LTGFR$")>;
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LH(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LG(B|H|F)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLH(Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSU], (instregex "LLHRL$")>;
+def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSU], (instregex "LLZRGF$")>;
+
+// Load and trap
+def : InstRW<[FXb, LSU, Lat5], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+ (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>;
+
+// Store multiple (estimated average of ceil(5/2) FXb ops)
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10,
+ GroupAlone], (instregex "STM(G|H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LRV(G)?R$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LRV(G|H)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STRV(G|H)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address ( -> larl )
+def : InstRW<[FXa], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LP(G)?R$")>;
+def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXa], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXa], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "IC(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "ICM(H|Y)?$")>;
+def : InstRW<[FXa], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXa], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "IILF(64)?$")>;
+def : InstRW<[FXa], (instregex "IILH(64)?$")>;
+def : InstRW<[FXa], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "A(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "AH(Y)?$")>;
+def : InstRW<[FXa], (instregex "AIH$")>;
+def : InstRW<[FXa], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "AG$")>;
+def : InstRW<[FXa], (instregex "AGFI$")>;
+def : InstRW<[FXa], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXa], (instregex "AGR(K)?$")>;
+def : InstRW<[FXa], (instregex "AHI(K)?$")>;
+def : InstRW<[FXa], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "AL(Y)?$")>;
+def : InstRW<[FXa], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "ALG(F)?$")>;
+def : InstRW<[FXa], (instregex "ALGHSIK$")>;
+def : InstRW<[FXa], (instregex "ALGF(I|R)$")>;
+def : InstRW<[FXa], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXa], (instregex "ALR(K)?$")>;
+def : InstRW<[FXa], (instregex "AR(K)?$")>;
+def : InstRW<[FXa], (instregex "A(L)?HHHR$")>;
+def : InstRW<[FXa, Lat2], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXa], (instregex "ALSIH(N)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>;
+
+// Logical addition with carry
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "ALC(G)?$")>;
+def : InstRW<[FXa, Lat2, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (16/32 -> 64)
+def : InstRW<[FXa, LSU, Lat6], (instregex "AG(F|H)$")>;
+def : InstRW<[FXa, Lat2], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "SH(Y)?$")>;
+def : InstRW<[FXa], (instregex "SGR(K)?$")>;
+def : InstRW<[FXa], (instregex "SLFI$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[FXa], (instregex "SLGF(I|R)$")>;
+def : InstRW<[FXa], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXa], (instregex "SLR(K)?$")>;
+def : InstRW<[FXa], (instregex "SR(K)?$")>;
+def : InstRW<[FXa], (instregex "S(L)?HHHR$")>;
+def : InstRW<[FXa, Lat2], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>;
+def : InstRW<[FXa, Lat2, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (16/32 -> 64)
+def : InstRW<[FXa, LSU, Lat6], (instregex "SG(F|H)$")>;
+def : InstRW<[FXa, Lat2], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "N(G|Y)?$")>;
+def : InstRW<[FXa], (instregex "NGR(K)?$")>;
+def : InstRW<[FXa], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "NI(Y)?$")>;
+def : InstRW<[FXa], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "NILF(64)?$")>;
+def : InstRW<[FXa], (instregex "NILH(64)?$")>;
+def : InstRW<[FXa], (instregex "NILL(64)?$")>;
+def : InstRW<[FXa], (instregex "NR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXa], (instregex "OGR(K)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "OI(Y)?$")>;
+def : InstRW<[FXa], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXa], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "OILF(64)?$")>;
+def : InstRW<[FXa], (instregex "OILH(64)?$")>;
+def : InstRW<[FXa], (instregex "OILL(64)?$")>;
+def : InstRW<[FXa], (instregex "OR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "X(G|Y)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "XI(Y)?$")>;
+def : InstRW<[FXa], (instregex "XIFMux$")>;
+def : InstRW<[FXa], (instregex "XGR(K)?$")>;
+def : InstRW<[FXa], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "XILF(64)?$")>;
+def : InstRW<[FXa], (instregex "XR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat9], (instregex "MS(GF|Y)?$")>;
+def : InstRW<[FXa, Lat5], (instregex "MS(R|FI)$")>;
+def : InstRW<[FXa, LSU, Lat11], (instregex "MSG$")>;
+def : InstRW<[FXa, Lat7], (instregex "MSGR$")>;
+def : InstRW<[FXa, Lat5], (instregex "MSGF(I|R)$")>;
+def : InstRW<[FXa2, LSU, Lat12, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXa2, Lat8, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXa, Lat4], (instregex "MGHI$")>;
+def : InstRW<[FXa, Lat4], (instregex "MHI$")>;
+def : InstRW<[FXa, LSU, Lat8], (instregex "MH(Y)?$")>;
+def : InstRW<[FXa2, Lat6, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[FXa2, LSU, Lat10, GroupAlone], (instregex "M(FY|L)?$")>;
+def : InstRW<[FXa, LSU, Lat8], (instregex "MGH$")>;
+def : InstRW<[FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>;
+def : InstRW<[FXa, Lat8, GroupAlone], (instregex "MGRK$")>;
+def : InstRW<[FXa, LSU, Lat9, GroupAlone], (instregex "MSC$")>;
+def : InstRW<[FXa, LSU, Lat11, GroupAlone], (instregex "MSGC$")>;
+def : InstRW<[FXa, Lat5], (instregex "MSRKC$")>;
+def : InstRW<[FXa, Lat7], (instregex "MSGRKC$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>;
+def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>;
+def : InstRW<[FXa2, Lat30, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[LSU, FXa2, Lat30, GroupAlone], (instregex "DSG(F)?$")>;
+def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SRL(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SLA(G|K)?$")>;
+def : InstRW<[FXa, FXa, FXa, FXa, LSU, Lat8, GroupAlone],
+ (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXa], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[FXa], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXa], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXa], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>;
+def : InstRW<[FXb], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[FXb], (instregex "CG(F|H)I$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[FXb], (instregex "C(G)?R$")>;
+def : InstRW<[FXb], (instregex "CIH$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CH(F|SI)$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXb], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLGF(RL)?$")>;
+def : InstRW<[FXb], (instregex "CLGF(I|R)$")>;
+def : InstRW<[FXb], (instregex "CLGR$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLGRL$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXb], (instregex "CLIH$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>;
+def : InstRW<[FXb], (instregex "CLR$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>;
+def : InstRW<[FXb], (instregex "C(L)?HHR$")>;
+def : InstRW<[FXb, Lat2], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXb, LSU, Lat6], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXa, FXb, LSU, Lat6, BeginGroup], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXb, LSU, Lat6], (instregex "CGF(RL)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "CLC$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[FXb, LSU, Lat5], (instregex "TM(Y)?$")>;
+def : InstRW<[FXb], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[FXb], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXb], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXb], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXb], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[FXb, LSU, Lat6], (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "PFD(RL)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "BPP$")>;
+def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>;
+def : InstRW<[FXb], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAA(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAN(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAO(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[FXb, LSU, Lat5, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[FXa, FXb, LSU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[FXa, FXa, FXb, FXb, FXa, LSU, Lat10, GroupAlone],
+ (instregex "CDS(Y)?$")>;
+def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone],
+ (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[FXa, LSU, Lat30], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[FXb, FXb, LSU, Lat6, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>;
+def : InstRW<[FXa, FXa, FXa, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>;
+def : InstRW<[FXa, LSU, Lat30], (instregex "TRTR$")>;
+def : InstRW<[FXa, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[FXa, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[FXa, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat30], (instregex "KM(C|F|O|CTR|A)?$")>;
+def : InstRW<[FXa, Lat30], (instregex "(KIMD|KLMD|KMAC)$")>;
+def : InstRW<[FXa, Lat30], (instregex "(PCC|PPNO|PRNO)$")>;
+
+//===----------------------------------------------------------------------===//
+// Guarded storage
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "LGG$")>;
+def : InstRW<[LSU, Lat5], (instregex "LLGFSG$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)GSC$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, VecDF, VecDF, LSU, LSU, Lat30, GroupAlone],
+ (instregex "CVBG$")>;
+def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>;
+def : InstRW<[FXb, FXb, FXb, VecDF2, VecDF2, LSU, Lat30, GroupAlone],
+ (instregex "CVDG$")>;
+def : InstRW<[FXb, VecDF, FXb, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>;
+def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "UNPK$")>;
+
+def : InstRW<[FXb, VecDFX, LSU, LSU, LSU, Lat9, GroupAlone],
+ (instregex "(A|S|ZA)P$")>;
+def : InstRW<[FXb, VecDFX2, VecDFX2, LSU, LSU, LSU, Lat30, GroupAlone],
+ (instregex "(M|D)P$")>;
+def : InstRW<[FXb, VecDFX, VecDFX, LSU, LSU, Lat15, GroupAlone],
+ (instregex "SRP$")>;
+def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>;
+def : InstRW<[VecDFX, LSU, Lat4, BeginGroup], (instregex "TP$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[LSU, FXa, Lat5, BeginGroup], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[FXa, Lat3, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[FXa, FXa, FXb, Lat5, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[FXb], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[FXb, Lat2, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[FXa, FXb, Lat2, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat15, GroupAlone],
+ (instregex "TBEGIN(C|_nofloat)?$")>;
+
+// Transaction end
+def : InstRW<[FXb, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[FXa], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[FXb, LSU, Lat5], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[FXa, FXa, Lat4, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>;
+
+// Extend
+def : InstRW<[FXa], (instregex "AEXT128$")>;
+def : InstRW<[FXa], (instregex "ZEXT128$")>;
+
+// String instructions
+def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>;
+def : InstRW<[FXa, Lat30], (instregex "SRSTU$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[LSU, Lat30], (instregex "CFC$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "UPT$")>;
+def : InstRW<[LSU, Lat30], (instregex "CKSM$")>;
+def : InstRW<[FXa, Lat30], (instregex "CMPSC$")>;
+
+// Execute
+def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Select instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "Select(F32|F64|F128|VR128)$")>;
+def : InstRW<[FXa], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStoreF64(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[FXb], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXb, FXb, Lat2, BeginGroup], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[VecXsPm], (instregex "LER$")>;
+def : InstRW<[FXb], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[FXb, Lat3], (instregex "LGDR$")>;
+def : InstRW<[FXb, FXb, Lat2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)BR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone],
+ (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Copy sign
+def : InstRW<[VecXsPm], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[VecXsPm], (instregex "CPSDRs(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm, LSU, Lat7], (instregex "LE(Y)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat7], (instregex "STD(Y)?$")>;
+def : InstRW<[FXb, LSU, Lat7], (instregex "STE(Y)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecBF], (instregex "LEDBR(A)?$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LEXBR(A)?$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[VecBF, LSU, Lat12], (instregex "LDEB$")>;
+def : InstRW<[VecBF], (instregex "LDEBR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12 , GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)BR(A)?$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat11, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLFDBR$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXb], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXb], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXb], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)B$")>;
+def : InstRW<[VecFPd], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[VecBF], (instregex "FIEBR(A)?$")>;
+def : InstRW<[VecBF], (instregex "FIDBR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>;
+def : InstRW<[VecBF], (instregex "A(E|D)BR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>;
+def : InstRW<[VecBF], (instregex "S(E|D)BR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[VecBF], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXDB$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[VecFPd, LSU], (instregex "D(E|D)B$")>;
+def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[VecFPd, Lat30], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecXsPm, LSU, Lat8], (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "(K|C)(E|D)BR?$")>;
+def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[LSU, VecXsPm, Lat9], (instregex "TC(E|D)B$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[FXa, Lat30], (instregex "SFASR$")>;
+def : InstRW<[FXa, LSU, Lat30], (instregex "LFAS$")>;
+def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[VecBF], (instregex "LEXR$")>;
+def : InstRW<[VecDF2], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXb], (instregex "LDER$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[VecBF], (instregex "THD(E)?R$")>;
+def : InstRW<[VecBF], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[VecBF], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)$")>;
+def : InstRW<[VecFPd], (instregex "SQ(E|D)R$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[VecBF], (instregex "FIER$")>;
+def : InstRW<[VecBF], (instregex "FIDR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[VecBF], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "MY(H|L)R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)DR$")>;
+def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "MAY(H|L)R$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAYR$")>;
+
+// Division
+def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>;
+def : InstRW<[VecFPd], (instregex "D(E|D)R$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecBF, LSU, Lat12], (instregex "C(E|D)$")>;
+def : InstRW<[VecBF], (instregex "C(E|D)R$")>;
+def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[VecDF], (instregex "LTDTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecDF, Lat15], (instregex "LEDTR$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[VecDF], (instregex "LDETR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>;
+def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CZDT$")>;
+def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDPT$")>;
+def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>;
+def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[FXb, Lat30], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[VecDF], (instregex "FIDTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEDTR$")>;
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "ESDTR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecDF], (instregex "ADTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[VecDF], (instregex "SDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[VecDF, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[VecDF, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[VecDF], (instregex "QADTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "RRDTR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, VecDF, Lat11, GroupAlone], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "IEDTR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecDF], (instregex "(K|C)DTR$")>;
+def : InstRW<[VecDF, VecDF, Lat11, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[VecDF], (instregex "CEDTR$")>;
+def : InstRW<[VecDF], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// --------------------------------- Vector --------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// Vector: Move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "VLR(32|64)?$")>;
+def : InstRW<[FXb, Lat4], (instregex "VLGV(B|F|G|H)?$")>;
+def : InstRW<[FXb], (instregex "VLVG(B|F|G|H)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "VLVGP(32)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Immediate instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VZERO$")>;
+def : InstRW<[VecXsPm], (instregex "VONE$")>;
+def : InstRW<[VecXsPm], (instregex "VGBM$")>;
+def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Loads
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "VL(L|BB)?$")>;
+def : InstRW<[LSU], (instregex "VL(32|64)$")>;
+def : InstRW<[LSU], (instregex "VLLEZ(B|F|G|H|LF)?$")>;
+def : InstRW<[LSU], (instregex "VLREP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, LSU, Lat7], (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VGE(F|G)$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+ (instregex "VLM$")>;
+def : InstRW<[LSU, Lat5], (instregex "VLRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Stores
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat8], (instregex "VST(L|32|64)?$")>;
+def : InstRW<[FXb, LSU, Lat8], (instregex "VSTE(F|G)$")>;
+def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VSTE(B|H)$")>;
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat20, GroupAlone],
+ (instregex "VSTM$")>;
+def : InstRW<[FXb, FXb, LSU, Lat12, BeginGroup], (instregex "VSCE(F|G)$")>;
+def : InstRW<[FXb, LSU, Lat8], (instregex "VSTRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Selects and permutes
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VMRH(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMRL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VPERM$")>;
+def : InstRW<[VecXsPm], (instregex "VPDI$")>;
+def : InstRW<[VecXsPm], (instregex "VBPERM$")>;
+def : InstRW<[VecXsPm], (instregex "VREP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VSEL$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VPK(F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VPKS(F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VPKS(F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VPKLS(F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VPKLS(F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VSEG(B|F|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPH(B|F|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPL(B|F)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPLH(B|F|H|W)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPLL(B|F|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VAVG(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VAVGL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VN(C|O|N|X)?$")>;
+def : InstRW<[VecXsPm], (instregex "VO(C)?$")>;
+def : InstRW<[VecMul], (instregex "VCKSM$")>;
+def : InstRW<[VecXsPm], (instregex "VCLZ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VCTZ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VX$")>;
+def : InstRW<[VecMul], (instregex "VGFM?$")>;
+def : InstRW<[VecMul], (instregex "VGFMA(B|F|G|H)?$")>;
+def : InstRW<[VecMul], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[VecXsPm], (instregex "VLC(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMX(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMXL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMN(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMNL(B|F|G|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAL(B|F)?$")>;
+def : InstRW<[VecMul], (instregex "VMALE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMALH(B|F|H|W)?$")>;
+def : InstRW<[VecMul], (instregex "VMALO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAH(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VME(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMH(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VML(B|F)?$")>;
+def : InstRW<[VecMul], (instregex "VMLE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMLH(B|F|H|W)?$")>;
+def : InstRW<[VecMul], (instregex "VMLO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMO(B|F|H)?$")>;
+def : InstRW<[VecBF2], (instregex "VMSL(G)?$")>;
+
+def : InstRW<[VecXsPm], (instregex "VPOPCT(B|F|G|H)?$")>;
+
+def : InstRW<[VecXsPm], (instregex "VERLL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VERLLV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VERIM(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESLV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRA(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRAV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRLV(B|F|G|H)?$")>;
+
+def : InstRW<[VecXsPm], (instregex "VSL(DB)?$")>;
+def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSLB$")>;
+def : InstRW<[VecXsPm], (instregex "VSR(A|L)$")>;
+def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSR(A|L)B$")>;
+
+def : InstRW<[VecXsPm], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VSCBI(B|F|G|H|Q)?$")>;
+def : InstRW<[VecXsPm], (instregex "VS(F|G|H|Q)?$")>;
+
+def : InstRW<[VecMul], (instregex "VSUM(B|H)?$")>;
+def : InstRW<[VecMul], (instregex "VSUMG(F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VSUMQ(F|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm, Lat4], (instregex "VEC(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VECL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VCEQ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCEQ(B|F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VCH(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCH(B|F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VCHL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCHL(B|F|G|H)S$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VTM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// Conversion and rounding
+def : InstRW<[VecBF], (instregex "VCD(L)?G$")>;
+def : InstRW<[VecBF], (instregex "VCD(L)?GB$")>;
+def : InstRW<[VecBF], (instregex "WCD(L)?GB$")>;
+def : InstRW<[VecBF], (instregex "VC(L)?GD$")>;
+def : InstRW<[VecBF], (instregex "VC(L)?GDB$")>;
+def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>;
+def : InstRW<[VecBF], (instregex "VL(DE|ED)$")>;
+def : InstRW<[VecBF], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>;
+def : InstRW<[VecBF], (instregex "VFL(L|R)$")>;
+def : InstRW<[VecBF], (instregex "VFL(LS|RD)$")>;
+def : InstRW<[VecBF], (instregex "WFL(LS|RD)$")>;
+def : InstRW<[VecBF2], (instregex "WFLLD$")>;
+def : InstRW<[VecDF2, Lat10], (instregex "WFLRX$")>;
+def : InstRW<[VecBF2], (instregex "VFI$")>;
+def : InstRW<[VecBF], (instregex "VFIDB$")>;
+def : InstRW<[VecBF], (instregex "WFIDB$")>;
+def : InstRW<[VecBF2], (instregex "VFISB$")>;
+def : InstRW<[VecBF], (instregex "WFISB$")>;
+def : InstRW<[VecDF2, Lat10], (instregex "WFIXB$")>;
+
+// Sign operations
+def : InstRW<[VecXsPm], (instregex "VFPSO$")>;
+def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[VecXsPm], (instregex "(V|W)FPSOSB$")>;
+def : InstRW<[VecXsPm], (instregex "WFPSOXB$")>;
+def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>;
+def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)SB$")>;
+def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)XB$")>;
+
+// Minimum / maximum
+def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)$")>;
+def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)DB$")>;
+def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)DB$")>;
+def : InstRW<[VecXsPm], (instregex "VF(MAX|MIN)SB$")>;
+def : InstRW<[VecXsPm], (instregex "WF(MAX|MIN)SB$")>;
+def : InstRW<[VecDFX], (instregex "WF(MAX|MIN)XB$")>;
+
+// Test data class
+def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCISB$")>;
+def : InstRW<[VecDFX, Lat4], (instregex "WFTCIXB$")>;
+
+// Add / subtract
+def : InstRW<[VecBF2], (instregex "VF(A|S)$")>;
+def : InstRW<[VecBF], (instregex "VF(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>;
+def : InstRW<[VecBF2], (instregex "VF(A|S)SB$")>;
+def : InstRW<[VecBF], (instregex "WF(A|S)SB$")>;
+def : InstRW<[VecDF2, Lat10], (instregex "WF(A|S)XB$")>;
+
+// Multiply / multiply-and-add/subtract
+def : InstRW<[VecBF2], (instregex "VFM$")>;
+def : InstRW<[VecBF], (instregex "VFMDB$")>;
+def : InstRW<[VecBF], (instregex "WFMDB$")>;
+def : InstRW<[VecBF2], (instregex "VFMSB$")>;
+def : InstRW<[VecBF], (instregex "WFMSB$")>;
+def : InstRW<[VecDF2, Lat20], (instregex "WFMXB$")>;
+def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)$")>;
+def : InstRW<[VecBF], (instregex "VF(N)?M(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)DB$")>;
+def : InstRW<[VecBF2], (instregex "VF(N)?M(A|S)SB$")>;
+def : InstRW<[VecBF], (instregex "WF(N)?M(A|S)SB$")>;
+def : InstRW<[VecDF2, Lat20], (instregex "WF(N)?M(A|S)XB$")>;
+
+// Divide / square root
+def : InstRW<[VecFPd], (instregex "VFD$")>;
+def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>;
+def : InstRW<[VecFPd], (instregex "(V|W)FDSB$")>;
+def : InstRW<[VecFPd], (instregex "WFDXB$")>;
+def : InstRW<[VecFPd], (instregex "VFSQ$")>;
+def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>;
+def : InstRW<[VecFPd], (instregex "(V|W)FSQSB$")>;
+def : InstRW<[VecFPd], (instregex "WFSQXB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)$")>;
+def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)DB$")>;
+def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)DB$")>;
+def : InstRW<[VecXsPm], (instregex "VF(C|K)(E|H|HE)SB$")>;
+def : InstRW<[VecXsPm], (instregex "WF(C|K)(E|H|HE)SB$")>;
+def : InstRW<[VecDFX], (instregex "WF(C|K)(E|H|HE)XB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)DBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)DBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VF(C|K)(E|H|HE)SBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)(E|H|HE)SBS$")>;
+def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)(E|H|HE)XBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)SB$")>;
+def : InstRW<[VecDFX, Lat4], (instregex "WF(C|K)XB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "LEFR$")>;
+def : InstRW<[FXb, Lat4], (instregex "LFER$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: String instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecStr], (instregex "VFAE(B)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAEBS$")>;
+def : InstRW<[VecStr], (instregex "VFAE(F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAE(F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VFAEZ(B|F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAEZ(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[VecStr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[VecStr], (instregex "VISTR(B|F|H)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VISTR(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VSTRC(B|F|H)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Packed-decimal instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecDF, VecDF, Lat10, GroupAlone], (instregex "VLIP$")>;
+def : InstRW<[VecDFX, LSU, Lat12, GroupAlone], (instregex "VPKZ$")>;
+def : InstRW<[VecDFX, FXb, LSU, Lat12, GroupAlone], (instregex "VUPKZ$")>;
+def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVB(G)?$")>;
+def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVD(G)?$")>;
+def : InstRW<[VecDFX], (instregex "V(A|S)P$")>;
+def : InstRW<[VecDF, VecDF, Lat30, GroupAlone], (instregex "VM(S)?P$")>;
+def : InstRW<[VecDF, VecDF, Lat30, GroupAlone], (instregex "V(D|R)P$")>;
+def : InstRW<[VecDFX, Lat30, GroupAlone], (instregex "VSDP$")>;
+def : InstRW<[VecDF, VecDF, Lat11], (instregex "VSRP$")>;
+def : InstRW<[VecDFX], (instregex "VPSOP$")>;
+def : InstRW<[VecDFX], (instregex "V(T|C)P$")>;
+
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "EPSW$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>;
+def : InstRW<[FXa, Lat3, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXa, Lat3], (instregex "IAC$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "LCTL(G)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[FXb, Lat30], (instregex "SSA(I)?R$")>;
+def : InstRW<[FXb, Lat30], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "SPX$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STPX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "ISKE$")>;
+def : InstRW<[FXb, Lat30], (instregex "IVSK$")>;
+def : InstRW<[FXb, Lat30], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "RRB(E|M)$")>;
+def : InstRW<[FXb, Lat30], (instregex "IRBM$")>;
+def : InstRW<[FXb, Lat30], (instregex "PFMF$")>;
+def : InstRW<[FXb, Lat30], (instregex "TB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PGIN$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "PTLB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "CSP(G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LPTEA$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STRAG$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "LURA(G)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STUR(A|G)$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>;
+def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>;
+def : InstRW<[FXb, Lat30], (instregex "PR$")>;
+def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "RP$")>;
+def : InstRW<[FXb, Lat30], (instregex "BS(G|A)$")>;
+def : InstRW<[FXb, Lat20], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30, EndGroup], (instregex "BAKR$")>;
+def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>;
+def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "PTFF$")>;
+def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>;
+def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>;
+def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>;
+def : InstRW<[LSU, LSU, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone],
+ (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone],
+ (instregex "STCKE$")>;
+def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>;
+def : InstRW<[LSU, LSU, FXb, Lat5, BeginGroup], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat30], (instregex "STAP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>;
+def : InstRW<[FXa, LSU, Lat30], (instregex "ECTG$")>;
+def : InstRW<[FXb, Lat30], (instregex "PTF$")>;
+def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "SVC$")>;
+def : InstRW<[FXb, GroupAlone], (instregex "MC$")>;
+def : InstRW<[FXb, Lat30], (instregex "DIAG$")>;
+def : InstRW<[FXb], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>;
+def : InstRW<[FXb, Lat30], (instregex "SIGP$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "SIGA$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "LPP$")>;
+def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>;
+def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>;
+def : InstRW<[FXb, Lat30], (instregex "LCCTL$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "L(P|S)CTL$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, Lat30], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[FXb, Lat30], (instregex "RCHP$")>;
+def : InstRW<[FXb, Lat30], (instregex "SCHM$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "STC(PS|RW)$")>;
+def : InstRW<[FXb, LSU, Lat30], (instregex "TPI$")>;
+def : InstRW<[FXb, Lat30], (instregex "SAL$")>;
+
+}
+
diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td
index e3e1999d8ad8..4d986e8391cf 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -311,7 +311,7 @@ def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
def : InstRW<[FXU], (instregex "ALR(K)?$")>;
def : InstRW<[FXU], (instregex "AR(K)?$")>;
def : InstRW<[FXU], (instregex "A(L)?HHHR$")>;
-def : InstRW<[FXU, FXU, Lat3], (instregex "A(L)?HHLR$")>;
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "A(L)?HHLR$")>;
def : InstRW<[FXU], (instregex "ALSIH(N)?$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>;
@@ -337,7 +337,7 @@ def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
def : InstRW<[FXU], (instregex "SLR(K)?$")>;
def : InstRW<[FXU], (instregex "SR(K)?$")>;
def : InstRW<[FXU], (instregex "S(L)?HHHR$")>;
-def : InstRW<[FXU, FXU, Lat3], (instregex "S(L)?HHLR$")>;
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "S(L)?HHLR$")>;
// Subtraction with borrow
def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
@@ -403,13 +403,13 @@ def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>;
def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>;
def : InstRW<[FXU, Lat8], (instregex "MSGR$")>;
def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>;
-def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
-def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXU, FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXU, FXU, Lat9, GroupAlone], (instregex "MLGR$")>;
def : InstRW<[FXU, Lat5], (instregex "MGHI$")>;
def : InstRW<[FXU, Lat5], (instregex "MHI$")>;
def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>;
-def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>;
-def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
+def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[FXU, FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
//===----------------------------------------------------------------------===//
// Division and remainder
@@ -436,7 +436,8 @@ def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
def : InstRW<[FXU, Lat2], (instregex "SLA(G|K)?$")>;
-def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>;
+def : InstRW<[FXU, FXU, FXU, FXU, LSU, Lat8, GroupAlone],
+ (instregex "S(L|R)D(A|L)$")>;
// Rotate
def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>;
@@ -474,7 +475,7 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
def : InstRW<[FXU], (instregex "CLR$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
def : InstRW<[FXU], (instregex "C(L)?HHR$")>;
-def : InstRW<[FXU, FXU, Lat3], (instregex "C(L)?HLR$")>;
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "C(L)?HLR$")>;
// Compare halfword
def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CH(Y|RL)?$")>;
@@ -499,7 +500,7 @@ def : InstRW<[FXU], (instregex "TMLH(64)?$")>;
def : InstRW<[FXU], (instregex "TMLL(64)?$")>;
// Compare logical characters under mask
-def : InstRW<[FXU, LSU, Lat5], (instregex "CLM(H|Y)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat5, GroupAlone], (instregex "CLM(H|Y)?$")>;
//===----------------------------------------------------------------------===//
// Prefetch
@@ -532,7 +533,7 @@ def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone],
(instregex "CDSG$")>;
// Compare and swap and store
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "CSST$")>;
// Perform locked operation
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
@@ -548,36 +549,44 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
// Translate and convert
//===----------------------------------------------------------------------===//
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TRT$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TRTR$")>;
+def : InstRW<[FXU, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
//===----------------------------------------------------------------------===//
// Message-security assist
//===----------------------------------------------------------------------===//
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>;
+def : InstRW<[FXU, Lat30], (instregex "KM(C|F|O|CTR)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC)$")>;
//===----------------------------------------------------------------------===//
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>;
-def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>;
+def : InstRW<[FXU, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "CVBG$")>;
+def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>;
+def : InstRW<[FXU, FXU, FXU, DFU2, DFU2, LSU, Lat30, GroupAlone],
+ (instregex "CVDG$")>;
+def : InstRW<[FXU, FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK$")>;
+def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>;
-def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone],
+def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat15, GroupAlone],
(instregex "(A|S|ZA)P$")>;
-def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone],
+def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat30, GroupAlone],
(instregex "(M|D)P$")>;
-def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone],
+def : InstRW<[FXU, FXU, DFU2, DFU2, LSU, LSU, LSU, Lat15, GroupAlone],
(instregex "SRP$")>;
-def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>;
-def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>;
+def : InstRW<[DFU2, DFU2, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>;
+def : InstRW<[DFU2, LSU, LSU, GroupAlone], (instregex "TP$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>;
//===----------------------------------------------------------------------===//
@@ -621,7 +630,7 @@ def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "BASSM$")>;
//===----------------------------------------------------------------------===//
// Find leftmost one
-def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
+def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
// Population count
def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
@@ -632,14 +641,14 @@ def : InstRW<[FXU], (instregex "ZEXT128$")>;
// String instructions
def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;
-def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>;
+def : InstRW<[FXU, Lat30], (instregex "SRSTU$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>;
// Various complex instructions
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
+def : InstRW<[LSU, Lat30], (instregex "CFC$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "UPT$")>;
+def : InstRW<[LSU, Lat30], (instregex "CKSM$")>;
+def : InstRW<[FXU, Lat30], (instregex "CMPSC$")>;
// Execute
def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
@@ -780,9 +789,9 @@ def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>;
def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>;
// Multiply and add / subtract
-def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>;
-def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>;
// Division
@@ -791,7 +800,7 @@ def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>;
def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>;
// Divide to integer
-def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>;
+def : InstRW<[FPU, Lat30], (instregex "DI(E|D)BR$")>;
//===----------------------------------------------------------------------===//
// FP: Comparisons
@@ -813,9 +822,9 @@ def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>;
def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
-def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
-def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
+def : InstRW<[FXU, LSU, Lat3, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[FXU, Lat30], (instregex "SFASR$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LFAS$")>;
def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
@@ -900,16 +909,20 @@ def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>;
def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>;
def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>;
def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>;
-def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>;
-def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY$")>;
+def : InstRW<[FPU, FPU, LSU, Lat15, GroupAlone], (instregex "MY(H|L)$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[FPU, Lat10, GroupAlone], (instregex "MY(H|L)R$")>;
// Multiply and add / subtract
-def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>;
-def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>;
-def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
-def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, GroupAlone], (instregex "MAY$")>;
+def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAYR$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "MAY(H|L)R$")>;
// Division
def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>;
@@ -949,16 +962,21 @@ def : InstRW<[DFU, Lat20], (instregex "LDETR$")>;
def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
-def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>;
-def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat9, GroupAlone], (instregex "CDFTR$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CDGTR(A)?$")>;
+def : InstRW<[FXU, DFU2, DFU2, GroupAlone], (instregex "CXFTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CXGTR(A)?$")>;
def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXLFTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat6, GroupAlone], (instregex "CXLGTR$")>;
// Convert to fixed / logical
-def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>;
-def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>;
-def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>;
-def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CFDTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CGDTR(A)?$")>;
+def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CFXTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CGXTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>;
@@ -967,7 +985,7 @@ def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>;
def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>;
// Perform floating-point operation
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+def : InstRW<[FXU, Lat30], (instregex "PFPO$")>;
//===----------------------------------------------------------------------===//
// DFP: Unary arithmetic
@@ -979,7 +997,7 @@ def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>;
// Extract biased exponent
def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>;
-def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>;
+def : InstRW<[FXU, DFU2, Lat15, GroupAlone], (instregex "EEXTR$")>;
// Extract significance
def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>;
@@ -1010,15 +1028,15 @@ def : InstRW<[DFU, Lat30], (instregex "QADTR$")>;
def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>;
// Reround
-def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "RRDTR$")>;
def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>;
// Shift significand left/right
-def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, DFU, Lat11, GroupAlone], (instregex "S(L|R)DT$")>;
def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
-def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "IEDTR$")>;
def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
@@ -1027,15 +1045,15 @@ def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
// Compare
def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>;
-def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>;
+def : InstRW<[DFU, DFU, Lat15], (instregex "(K|C)XTR$")>;
// Compare biased exponent
def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>;
-def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
+def : InstRW<[DFU2, Lat9], (instregex "CEXTR$")>;
// Test Data Class/Group
def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
-def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+def : InstRW<[LSU, DFU2, Lat15], (instregex "TD(C|G)XT$")>;
// -------------------------------- System ---------------------------------- //
@@ -1046,19 +1064,20 @@ def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>;
-def : InstRW<[FXU, Lat3], (instregex "IPK$")>;
-def : InstRW<[LSU], (instregex "SPKA$")>;
-def : InstRW<[LSU], (instregex "SSM$")>;
-def : InstRW<[FXU], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[FXU, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
def : InstRW<[FXU, Lat3], (instregex "IAC$")>;
-def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>;
//===----------------------------------------------------------------------===//
// System: Control Register Instructions
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>;
-def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[FXU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+ (instregex "STCT(L|G)$")>;
def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>;
def : InstRW<[FXU, Lat30], (instregex "ESEA$")>;
@@ -1103,16 +1122,17 @@ def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
-def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCSK$")>;
+def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVCDK$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>;
-def : InstRW<[LSU, Lat30], (instregex "MVPG$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>;
//===----------------------------------------------------------------------===//
// System: Address-Space Instructions
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>;
-def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>;
def : InstRW<[FXU, Lat30], (instregex "PR$")>;
def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>;
@@ -1124,7 +1144,7 @@ def : InstRW<[FXU, Lat20], (instregex "TAR$")>;
// System: Linkage-Stack Instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXU, LSU, Lat30, EndGroup], (instregex "BAKR$")>;
def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>;
def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>;
@@ -1161,9 +1181,9 @@ def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
-def : InstRW<[FXU], (instregex "MC$")>;
+def : InstRW<[FXU, GroupAlone], (instregex "MC$")>;
def : InstRW<[FXU, Lat30], (instregex "DIAG$")>;
-def : InstRW<[FXU], (instregex "TRAC(E|G)$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TRAC(E|G)$")>;
def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>;
def : InstRW<[FXU, Lat30], (instregex "SIGP$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>;
@@ -1176,7 +1196,8 @@ def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>;
def : InstRW<[FXU], (instregex "LPP$")>;
def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>;
def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>;
-def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXU, Lat30], (instregex "LCCTL$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "L(P|S)CTL$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>;
diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 59f37205f412..a0f2115eb9d7 100644
--- a/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -69,7 +69,7 @@ def : WriteRes<LSU_lat1, [ZEC12_LSUnit]> { let Latency = 1; }
def : WriteRes<FPU, [ZEC12_FPUnit]> { let Latency = 8; }
def : WriteRes<FPU2, [ZEC12_FPUnit, ZEC12_FPUnit]> { let Latency = 9; }
def : WriteRes<DFU, [ZEC12_DFUnit]> { let Latency = 2; }
-def : WriteRes<DFU2, [ZEC12_DFUnit, ZEC12_FPUnit]> { let Latency = 3; }
+def : WriteRes<DFU2, [ZEC12_DFUnit, ZEC12_DFUnit]> { let Latency = 3; }
def : WriteRes<VBU, [ZEC12_VBUnit]>; // Virtual Branching Unit
// -------------------------- INSTRUCTIONS ---------------------------------- //
@@ -251,7 +251,7 @@ def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
(instregex "LM(H|Y|G)?$")>;
// Load multiple disjoint
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "LMD$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>;
// Store multiple (estimated average of 3 ops)
def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone],
@@ -413,13 +413,13 @@ def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>;
def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>;
def : InstRW<[FXU, Lat8], (instregex "MSGR$")>;
def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>;
-def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
-def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXU, FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXU, FXU, Lat9, GroupAlone], (instregex "MLGR$")>;
def : InstRW<[FXU, Lat5], (instregex "MGHI$")>;
def : InstRW<[FXU, Lat5], (instregex "MHI$")>;
def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>;
-def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>;
-def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
+def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[FXU, FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>;
//===----------------------------------------------------------------------===//
// Division and remainder
@@ -446,7 +446,8 @@ def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
def : InstRW<[FXU], (instregex "SLA(G|K)?$")>;
-def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>;
+def : InstRW<[FXU, FXU, FXU, FXU, LSU, Lat8, GroupAlone],
+ (instregex "S(L|R)D(A|L)$")>;
// Rotate
def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>;
@@ -544,7 +545,7 @@ def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone],
(instregex "CDSG$")>;
// Compare and swap and store
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "CSST$")>;
// Perform locked operation
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
@@ -560,36 +561,44 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
// Translate and convert
//===----------------------------------------------------------------------===//
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>;
+def : InstRW<[FXU, FXU, FXU, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "TRTR$")>;
+def : InstRW<[FXU, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>;
+def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
//===----------------------------------------------------------------------===//
// Message-security assist
//===----------------------------------------------------------------------===//
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>;
+def : InstRW<[FXU, Lat30], (instregex "KM(C|F|O|CTR)?$")>;
+def : InstRW<[FXU, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC)$")>;
//===----------------------------------------------------------------------===//
// Decimal arithmetic
//===----------------------------------------------------------------------===//
-def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>;
-def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>;
+def : InstRW<[FXU, DFU2, LSU, LSU, Lat30, GroupAlone], (instregex "CVBG$")>;
+def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>;
+def : InstRW<[FXU, FXU, FXU, DFU2, DFU2, LSU, Lat30, GroupAlone],
+ (instregex "CVDG$")>;
+def : InstRW<[FXU, FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>;
+def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK$")>;
+def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>;
-def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone],
+def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat15, GroupAlone],
(instregex "(A|S|ZA)P$")>;
-def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone],
+def : InstRW<[FXU, DFU2, DFU2, LSU, LSU, Lat30, GroupAlone],
(instregex "(M|D)P$")>;
-def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone],
+def : InstRW<[FXU, FXU, DFU2, DFU2, LSU, LSU, LSU, Lat15, GroupAlone],
(instregex "SRP$")>;
-def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>;
-def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>;
+def : InstRW<[DFU2, DFU2, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>;
+def : InstRW<[DFU2, LSU, LSU, Lat5, GroupAlone], (instregex "TP$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>;
//===----------------------------------------------------------------------===//
@@ -659,7 +668,7 @@ def : InstRW<[FXU], (instregex "PPA$")>;
//===----------------------------------------------------------------------===//
// Find leftmost one
-def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
+def : InstRW<[FXU, FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
// Population count
def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
@@ -670,14 +679,14 @@ def : InstRW<[FXU], (instregex "ZEXT128$")>;
// String instructions
def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;
-def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>;
+def : InstRW<[FXU, Lat30], (instregex "SRSTU$")>;
def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>;
// Various complex instructions
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>;
+def : InstRW<[LSU, Lat30], (instregex "CFC$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "UPT$")>;
+def : InstRW<[LSU, Lat30], (instregex "CKSM$")>;
+def : InstRW<[FXU, Lat30], (instregex "CMPSC$")>;
// Execute
def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
@@ -818,9 +827,9 @@ def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>;
def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>;
// Multiply and add / subtract
-def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>;
-def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>;
// Division
@@ -829,7 +838,7 @@ def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>;
def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>;
// Divide to integer
-def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>;
+def : InstRW<[FPU, Lat30], (instregex "DI(E|D)BR$")>;
//===----------------------------------------------------------------------===//
// FP: Comparisons
@@ -851,10 +860,10 @@ def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>;
def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
-def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>;
-def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
-def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
-def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
+def : InstRW<[FXU, LSU, Lat3, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[FXU, Lat30], (instregex "SFASR$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "LFAS$")>;
+def : InstRW<[FXU, GroupAlone], (instregex "SRNM(B|T)?$")>;
// --------------------- Hexadecimal floating point ------------------------- //
@@ -938,16 +947,20 @@ def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>;
def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>;
def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>;
def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>;
-def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>;
-def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY$")>;
+def : InstRW<[FPU, FPU, LSU, Lat15, GroupAlone], (instregex "MY(H|L)$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[FPU, Lat10, GroupAlone], (instregex "MY(H|L)R$")>;
// Multiply and add / subtract
-def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>;
-def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>;
-def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
-def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, GroupAlone], (instregex "MAY$")>;
+def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAYR$")>;
+def : InstRW<[FPU, FPU, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "MAY(H|L)R$")>;
// Division
def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>;
@@ -987,16 +1000,21 @@ def : InstRW<[DFU, Lat20], (instregex "LDETR$")>;
def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>;
// Convert from fixed / logical
-def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>;
-def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat9, GroupAlone], (instregex "CDFTR$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CDGTR(A)?$")>;
+def : InstRW<[FXU, DFU2, DFU2, GroupAlone], (instregex "CXFTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CXGTR(A)?$")>;
def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>;
-def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXLFTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat6, GroupAlone], (instregex "CXLGTR$")>;
// Convert to fixed / logical
-def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>;
-def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>;
-def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>;
-def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CFDTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CGDTR(A)?$")>;
+def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CFXTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CGXTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat11, GroupAlone], (instregex "CL(F|G)XTR$")>;
// Convert from / to signed / unsigned packed
def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>;
@@ -1007,11 +1025,11 @@ def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")
// Convert from / to zoned
def : InstRW<[LSU, DFU2, Lat7, GroupAlone], (instregex "CDZT$")>;
def : InstRW<[LSU, LSU, DFU2, DFU2, Lat10, GroupAlone], (instregex "CXZT$")>;
-def : InstRW<[FXU, LSU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>;
+def : InstRW<[FXU, LSU, DFU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>;
def : InstRW<[FXU, LSU, DFU, DFU, Lat15, GroupAlone], (instregex "CZXT$")>;
// Perform floating-point operation
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+def : InstRW<[FXU, Lat30], (instregex "PFPO$")>;
//===----------------------------------------------------------------------===//
// DFP: Unary arithmetic
@@ -1023,7 +1041,7 @@ def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>;
// Extract biased exponent
def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>;
-def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>;
+def : InstRW<[FXU, DFU2, Lat15, GroupAlone], (instregex "EEXTR$")>;
// Extract significance
def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>;
@@ -1054,15 +1072,15 @@ def : InstRW<[DFU, Lat30], (instregex "QADTR$")>;
def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>;
// Reround
-def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "RRDTR$")>;
def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>;
// Shift significand left/right
-def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, DFU, Lat11, GroupAlone], (instregex "S(L|R)DT$")>;
def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
// Insert biased exponent
-def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "IEDTR$")>;
def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
//===----------------------------------------------------------------------===//
@@ -1071,15 +1089,15 @@ def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
// Compare
def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>;
-def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>;
+def : InstRW<[DFU, DFU, Lat15], (instregex "(K|C)XTR$")>;
// Compare biased exponent
def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>;
-def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
+def : InstRW<[DFU, DFU, Lat9], (instregex "CEXTR$")>;
// Test Data Class/Group
def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
-def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+def : InstRW<[LSU, DFU2, Lat15], (instregex "TD(C|G)XT$")>;
// -------------------------------- System ---------------------------------- //
@@ -1090,19 +1108,20 @@ def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
def : InstRW<[FXU, Lat30], (instregex "EPSW$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>;
-def : InstRW<[FXU, Lat3], (instregex "IPK$")>;
-def : InstRW<[LSU], (instregex "SPKA$")>;
-def : InstRW<[LSU], (instregex "SSM$")>;
-def : InstRW<[FXU], (instregex "ST(N|O)SM$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[FXU, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
def : InstRW<[FXU, Lat3], (instregex "IAC$")>;
-def : InstRW<[LSU], (instregex "SAC(F)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>;
//===----------------------------------------------------------------------===//
// System: Control Register Instructions
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>;
-def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>;
+def : InstRW<[FXU, LSU, LSU, LSU, LSU, Lat30, GroupAlone],
+ (instregex "STCT(L|G)$")>;
def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>;
def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>;
def : InstRW<[FXU, Lat30], (instregex "ESEA$")>;
@@ -1148,16 +1167,17 @@ def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>;
-def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>;
+def : InstRW<[LSU, Lat6, Lat30, GroupAlone], (instregex "MVCSK$")>;
+def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVCDK$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>;
-def : InstRW<[LSU, Lat30], (instregex "MVPG$")>;
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>;
//===----------------------------------------------------------------------===//
// System: Address-Space Instructions
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>;
-def : InstRW<[LSU], (instregex "PALB$")>;
+def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>;
def : InstRW<[FXU, Lat30], (instregex "PR$")>;
def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>;
@@ -1169,7 +1189,7 @@ def : InstRW<[FXU, Lat20], (instregex "TAR$")>;
// System: Linkage-Stack Instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>;
+def : InstRW<[FXU, LSU, Lat30, EndGroup], (instregex "BAKR$")>;
def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>;
def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>;
@@ -1206,7 +1226,7 @@ def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
-def : InstRW<[FXU], (instregex "MC$")>;
+def : InstRW<[FXU, GroupAlone], (instregex "MC$")>;
def : InstRW<[FXU, Lat30], (instregex "DIAG$")>;
def : InstRW<[FXU], (instregex "TRAC(E|G)$")>;
def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>;
@@ -1221,7 +1241,8 @@ def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>;
def : InstRW<[FXU], (instregex "LPP$")>;
def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>;
def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>;
-def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>;
+def : InstRW<[FXU, Lat30], (instregex "LCCTL$")>;
+def : InstRW<[FXU, LSU, Lat30], (instregex "L(P|S)CTL$")>;
def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>;
def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>;
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 7391df8342ef..13ceb371a425 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -200,14 +200,26 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
break;
+ case SystemZ::WFASB:
+ Changed |= shortenOn001AddCC(MI, SystemZ::AEBR);
+ break;
+
case SystemZ::WFDDB:
Changed |= shortenOn001(MI, SystemZ::DDBR);
break;
+ case SystemZ::WFDSB:
+ Changed |= shortenOn001(MI, SystemZ::DEBR);
+ break;
+
case SystemZ::WFIDB:
Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
break;
+ case SystemZ::WFISB:
+ Changed |= shortenFPConv(MI, SystemZ::FIEBRA);
+ break;
+
case SystemZ::WLDEB:
Changed |= shortenOn01(MI, SystemZ::LDEBR);
break;
@@ -220,30 +232,58 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenOn001(MI, SystemZ::MDBR);
break;
+ case SystemZ::WFMSB:
+ Changed |= shortenOn001(MI, SystemZ::MEEBR);
+ break;
+
case SystemZ::WFLCDB:
Changed |= shortenOn01(MI, SystemZ::LCDFR);
break;
+ case SystemZ::WFLCSB:
+ Changed |= shortenOn01(MI, SystemZ::LCDFR_32);
+ break;
+
case SystemZ::WFLNDB:
Changed |= shortenOn01(MI, SystemZ::LNDFR);
break;
+ case SystemZ::WFLNSB:
+ Changed |= shortenOn01(MI, SystemZ::LNDFR_32);
+ break;
+
case SystemZ::WFLPDB:
Changed |= shortenOn01(MI, SystemZ::LPDFR);
break;
+ case SystemZ::WFLPSB:
+ Changed |= shortenOn01(MI, SystemZ::LPDFR_32);
+ break;
+
case SystemZ::WFSQDB:
Changed |= shortenOn01(MI, SystemZ::SQDBR);
break;
+ case SystemZ::WFSQSB:
+ Changed |= shortenOn01(MI, SystemZ::SQEBR);
+ break;
+
case SystemZ::WFSDB:
Changed |= shortenOn001AddCC(MI, SystemZ::SDBR);
break;
+ case SystemZ::WFSSB:
+ Changed |= shortenOn001AddCC(MI, SystemZ::SEBR);
+ break;
+
case SystemZ::WFCDB:
Changed |= shortenOn01(MI, SystemZ::CDBR);
break;
+ case SystemZ::WFCSB:
+ Changed |= shortenOn01(MI, SystemZ::CEBR);
+ break;
+
case SystemZ::VL32:
// For z13 we prefer LDE over LE to avoid partial register dependencies.
Changed |= shortenOn0(MI, SystemZ::LDE32);
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index eb4a0962f7eb..9cd09b0f911e 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -47,6 +47,10 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasVector(false), HasLoadStoreOnCond2(false),
HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false),
HasDFPPackedConversion(false),
+ HasMiscellaneousExtensions2(false), HasGuardedStorage(false),
+ HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false),
+ HasVectorEnhancements1(false), HasVectorPackedDecimal(false),
+ HasInsertReferenceBitsMultiple(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index b05a1bb6cafd..4829f73e080e 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -56,6 +56,13 @@ protected:
bool HasLoadAndZeroRightmostByte;
bool HasMessageSecurityAssist5;
bool HasDFPPackedConversion;
+ bool HasMiscellaneousExtensions2;
+ bool HasGuardedStorage;
+ bool HasMessageSecurityAssist7;
+ bool HasMessageSecurityAssist8;
+ bool HasVectorEnhancements1;
+ bool HasVectorPackedDecimal;
+ bool HasInsertReferenceBitsMultiple;
private:
Triple TargetTriple;
@@ -168,6 +175,33 @@ public:
// Return true if the target has the vector facility.
bool hasVector() const { return HasVector; }
+ // Return true if the target has the miscellaneous-extensions facility 2.
+ bool hasMiscellaneousExtensions2() const {
+ return HasMiscellaneousExtensions2;
+ }
+
+ // Return true if the target has the guarded-storage facility.
+ bool hasGuardedStorage() const { return HasGuardedStorage; }
+
+ // Return true if the target has the message-security-assist
+ // extension facility 7.
+ bool hasMessageSecurityAssist7() const { return HasMessageSecurityAssist7; }
+
+ // Return true if the target has the message-security-assist
+ // extension facility 8.
+ bool hasMessageSecurityAssist8() const { return HasMessageSecurityAssist8; }
+
+ // Return true if the target has the vector-enhancements facility 1.
+ bool hasVectorEnhancements1() const { return HasVectorEnhancements1; }
+
+ // Return true if the target has the vector-packed-decimal facility.
+ bool hasVectorPackedDecimal() const { return HasVectorPackedDecimal; }
+
+ // Return true if the target has the insert-reference-bits-multiple facility.
+ bool hasInsertReferenceBitsMultiple() const {
+ return HasInsertReferenceBitsMultiple;
+ }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index cb81c0e5276e..025bf73d2df0 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -143,8 +143,10 @@ public:
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
addPass(createSystemZTDCPass());
+ addPass(createLoopDataPrefetchPass());
+ }
TargetPassConfig::addIRPasses();
}
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 9ac768b2189d..506dc7427993 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -372,6 +372,9 @@ int SystemZTTIImpl::getArithmeticInstrCost(
Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
switch (ScalarBits) {
case 32: {
+ // The vector enhancements facility 1 provides v4f32 instructions.
+ if (ST->hasVectorEnhancements1())
+ return NumVectors;
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 6923fc6fc910..a0c6fa94f8c1 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -56,6 +56,10 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getCacheLineSize() { return 256; }
+ unsigned getPrefetchDistance() { return 2000; }
+ unsigned getMinPrefetchStride() { return 2048; }
+
bool prefersVectorizedAddressing() { return false; }
bool supportsEfficientVectorElementLoadStore() { return true; }
bool enableInterleavedAccessVectorization() { return true; }
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index fc4adddc149b..6e08d4cff6ea 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -37,6 +37,7 @@ endif()
set(sources
X86AsmPrinter.cpp
X86CallFrameOptimization.cpp
+ X86CmovConversion.cpp
X86ExpandPseudo.cpp
X86FastISel.cpp
X86FixupBWInsts.cpp
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 19c93cfff0fe..91201d1fec85 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -83,6 +83,9 @@ FunctionPass *createX86WinEHStatePass();
/// the MachineInstr to MC.
FunctionPass *createX86ExpandPseudoPass();
+/// This pass converts X86 cmov instructions into branch when profitable.
+FunctionPass *createX86CmovConverterPass();
+
/// Return a Machine IR pass that selectively replaces
/// certain byte and word instructions by equivalent 32 bit instructions,
/// in order to eliminate partial register usage, false dependences on
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 4ca57fe9fb00..54eabeac5126 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -814,10 +814,8 @@ def : Proc<"bdver4", [
FeatureMWAITX
]>;
-// TODO: The scheduler model falls to BTVER2 model.
-// The znver1 model has to be put in place.
-// Zen
-def: ProcessorModel<"znver1", BtVer2Model, [
+// Znver1
+def: ProcessorModel<"znver1", Znver1Model, [
FeatureADX,
FeatureAES,
FeatureAVX2,
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 6decb550ad5f..26461986427d 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -448,7 +448,7 @@ def RetCC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::Swift", CCDelegateTo<RetCC_X86_64_Swift>>,
// Handle explicit CC selection
- CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
+ CCIfCC<"CallingConv::Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
// Handle Vectorcall CC
@@ -1004,7 +1004,7 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo<CC_X86_64_WebKit_JS>>,
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
- CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
+ CCIfCC<"CallingConv::Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp
new file mode 100644
index 000000000000..bfc834435de5
--- /dev/null
+++ b/lib/Target/X86/X86CmovConversion.cpp
@@ -0,0 +1,611 @@
+//====-- X86CmovConversion.cpp - Convert Cmov to Branch -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a pass that converts X86 cmov instructions into branch
+/// when profitable. This pass is conservative, i.e., it applies transformation
+/// if and only if it can gaurantee a gain with high confidence.
+///
+/// Thus, the optimization applies under the following conditions:
+/// 1. Consider as a candidate only CMOV in most inner loop, assuming that
+/// most hotspots are represented by these loops.
+/// 2. Given a group of CMOV instructions, that are using same EFLAGS def
+/// instruction:
+/// a. Consider them as candidates only if all have same code condition or
+/// opposite one, to prevent generating more than one conditional jump
+/// per EFLAGS def instruction.
+/// b. Consider them as candidates only if all are profitable to be
+/// converted, assuming that one bad conversion may casue a degradation.
+/// 3. Apply conversion only for loop that are found profitable and only for
+/// CMOV candidates that were found profitable.
+/// a. Loop is considered profitable only if conversion will reduce its
+/// depth cost by some thrishold.
+/// b. CMOV is considered profitable if the cost of its condition is higher
+/// than the average cost of its true-value and false-value by 25% of
+/// branch-misprediction-penalty, this to assure no degredassion even
+/// with 25% branch misprediction.
+///
+/// Note: This pass is assumed to run on SSA machine code.
+//===----------------------------------------------------------------------===//
+//
+// External interfaces:
+// FunctionPass *llvm::createX86CmovConverterPass();
+// bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF);
+//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-cmov-converter"
+
+STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
+STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
+STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
+STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
+
+namespace {
+// This internal switch can be used to turn off the cmov/branch optimization.
+static cl::opt<bool>
+ EnableCmovConverter("x86-cmov-converter",
+ cl::desc("Enable the X86 cmov-to-branch optimization."),
+ cl::init(true), cl::Hidden);
+
+/// Converts X86 cmov instructions into branches when profitable.
+class X86CmovConverterPass : public MachineFunctionPass {
+public:
+ X86CmovConverterPass() : MachineFunctionPass(ID) {}
+ ~X86CmovConverterPass() {}
+
+ StringRef getPassName() const override { return "X86 cmov Conversion"; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ /// Pass identification, replacement for typeid.
+ static char ID;
+
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ TargetSchedModel TSchedModel;
+
+ /// List of consecutive CMOV instructions.
+ typedef SmallVector<MachineInstr *, 2> CmovGroup;
+ typedef SmallVector<CmovGroup, 2> CmovGroups;
+
+ /// Collect all CMOV-group-candidates in \p CurrLoop and update \p
+ /// CmovInstGroups accordingly.
+ ///
+ /// \param CurrLoop Loop being processed.
+ /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
+ /// \returns true iff it found any CMOV-group-candidate.
+ bool collectCmovCandidates(MachineLoop *CurrLoop, CmovGroups &CmovInstGroups);
+
+ /// Check if it is profitable to transform each CMOV-group-candidates into
+ /// branch. Remove all groups that are not profitable from \p CmovInstGroups.
+ ///
+ /// \param CurrLoop Loop being processed.
+ /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
+ /// \returns true iff any CMOV-group-candidate remain.
+ bool checkForProfitableCmovCandidates(MachineLoop *CurrLoop,
+ CmovGroups &CmovInstGroups);
+
+ /// Convert the given list of consecutive CMOV instructions into a branch.
+ ///
+ /// \param Group Consecutive CMOV instructions to be converted into branch.
+ void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const;
+};
+
+char X86CmovConverterPass::ID = 0;
+
+void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+}
+
+bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+ if (!EnableCmovConverter)
+ return false;
+
+ DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
+ << "**********\n");
+
+ bool Changed = false;
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ MRI = &MF.getRegInfo();
+ TII = STI.getInstrInfo();
+ TSchedModel.init(STI.getSchedModel(), &STI, TII);
+
+ //===--------------------------------------------------------------------===//
+ // Algorithm
+ // ---------
+ // For each inner most loop
+ // collectCmovCandidates() {
+ // Find all CMOV-group-candidates.
+ // }
+ //
+ // checkForProfitableCmovCandidates() {
+ // * Calculate both loop-depth and optimized-loop-depth.
+ // * Use these depth to check for loop transformation profitability.
+ // * Check for CMOV-group-candidate transformation profitability.
+ // }
+ //
+ // For each profitable CMOV-group-candidate
+ // convertCmovInstsToBranches() {
+ // * Create FalseBB, SinkBB, Conditional branch to SinkBB.
+ // * Replace each CMOV instruction with a PHI instruction in SinkBB.
+ // }
+ //
+ // Note: For more details, see each function description.
+ //===--------------------------------------------------------------------===//
+ for (MachineBasicBlock &MBB : MF) {
+ MachineLoop *CurrLoop = MLI.getLoopFor(&MBB);
+
+ // Optimize only inner most loops.
+ if (!CurrLoop || CurrLoop->getHeader() != &MBB ||
+ !CurrLoop->getSubLoops().empty())
+ continue;
+
+ // List of consecutive CMOV instructions to be processed.
+ CmovGroups CmovInstGroups;
+
+ if (!collectCmovCandidates(CurrLoop, CmovInstGroups))
+ continue;
+
+ if (!checkForProfitableCmovCandidates(CurrLoop, CmovInstGroups))
+ continue;
+
+ Changed = true;
+ for (auto &Group : CmovInstGroups)
+ convertCmovInstsToBranches(Group);
+ }
+ return Changed;
+}
+
+bool X86CmovConverterPass::collectCmovCandidates(MachineLoop *CurrLoop,
+ CmovGroups &CmovInstGroups) {
+ //===--------------------------------------------------------------------===//
+ // Collect all CMOV-group-candidates and add them into CmovInstGroups.
+ //
+ // CMOV-group:
+ // CMOV instructions, in same MBB, that uses same EFLAGS def instruction.
+ //
+ // CMOV-group-candidate:
+ // CMOV-group where all the CMOV instructions are
+ // 1. consecutive.
+ // 2. have same condition code or opposite one.
+ // 3. have only operand registers (X86::CMOVrr).
+ //===--------------------------------------------------------------------===//
+ // List of possible improvement (TODO's):
+ // --------------------------------------
+ // TODO: Add support for X86::CMOVrm instructions.
+ // TODO: Add support for X86::SETcc instructions.
+ // TODO: Add support for CMOV-groups with non consecutive CMOV instructions.
+ //===--------------------------------------------------------------------===//
+
+ // Current processed CMOV-Group.
+ CmovGroup Group;
+ for (auto *MBB : CurrLoop->getBlocks()) {
+ Group.clear();
+ // Condition code of first CMOV instruction current processed range and its
+ // opposite condition code.
+ X86::CondCode FirstCC, FirstOppCC;
+ // Indicator of a non CMOVrr instruction in the current processed range.
+ bool FoundNonCMOVInst = false;
+ // Indicator for current processed CMOV-group if it should be skipped.
+ bool SkipGroup = false;
+
+ for (auto &I : *MBB) {
+ X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode());
+ // Check if we found a X86::CMOVrr instruction.
+ if (CC != X86::COND_INVALID && !I.mayLoad()) {
+ if (Group.empty()) {
+ // We found first CMOV in the range, reset flags.
+ FirstCC = CC;
+ FirstOppCC = X86::GetOppositeBranchCondition(CC);
+ FoundNonCMOVInst = false;
+ SkipGroup = false;
+ }
+ Group.push_back(&I);
+ // Check if it is a non-consecutive CMOV instruction or it has different
+ // condition code than FirstCC or FirstOppCC.
+ if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
+ // Mark the SKipGroup indicator to skip current processed CMOV-Group.
+ SkipGroup = true;
+ continue;
+ }
+ // If Group is empty, keep looking for first CMOV in the range.
+ if (Group.empty())
+ continue;
+
+ // We found a non X86::CMOVrr instruction.
+ FoundNonCMOVInst = true;
+ // Check if this instruction define EFLAGS, to determine end of processed
+ // range, as there would be no more instructions using current EFLAGS def.
+ if (I.definesRegister(X86::EFLAGS)) {
+ // Check if current processed CMOV-group should not be skipped and add
+ // it as a CMOV-group-candidate.
+ if (!SkipGroup)
+ CmovInstGroups.push_back(Group);
+ else
+ ++NumOfSkippedCmovGroups;
+ Group.clear();
+ }
+ }
+ // End of basic block is considered end of range, check if current processed
+ // CMOV-group should not be skipped and add it as a CMOV-group-candidate.
+ if (Group.empty())
+ continue;
+ if (!SkipGroup)
+ CmovInstGroups.push_back(Group);
+ else
+ ++NumOfSkippedCmovGroups;
+ }
+
+ NumOfCmovGroupCandidate += CmovInstGroups.size();
+ return !CmovInstGroups.empty();
+}
+
+/// \returns Depth of CMOV instruction as if it was converted into branch.
+/// \param TrueOpDepth depth cost of CMOV true value operand.
+/// \param FalseOpDepth depth cost of CMOV false value operand.
+static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
+ //===--------------------------------------------------------------------===//
+ // With no info about branch weight, we assume 50% for each value operand.
+ // Thus, depth of optimized CMOV instruction is the rounded up average of
+ // its True-Operand-Value-Depth and False-Operand-Value-Depth.
+ //===--------------------------------------------------------------------===//
+ return (TrueOpDepth + FalseOpDepth + 1) / 2;
+}
+
+bool X86CmovConverterPass::checkForProfitableCmovCandidates(
+ MachineLoop *CurrLoop, CmovGroups &CmovInstGroups) {
+ struct DepthInfo {
+ /// Depth of original loop.
+ unsigned Depth;
+ /// Depth of optimized loop.
+ unsigned OptDepth;
+ };
+ /// Number of loop iterations to calculate depth for ?!
+ static const unsigned LoopIterations = 2;
+ DenseMap<MachineInstr *, DepthInfo> DepthMap;
+ DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
+ enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
+ /// For each register type maps the register to its last def instruction.
+ DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
+ /// Maps register operand to its def instruction, which can be nullptr if it
+ /// is unknown (e.g., operand is defined outside the loop).
+ DenseMap<MachineOperand *, MachineInstr *> OperandToDefMap;
+
+ // Set depth of unknown instruction (i.e., nullptr) to zero.
+ DepthMap[nullptr] = {0, 0};
+
+ SmallPtrSet<MachineInstr *, 4> CmovInstructions;
+ for (auto &Group : CmovInstGroups)
+ CmovInstructions.insert(Group.begin(), Group.end());
+
+ //===--------------------------------------------------------------------===//
+ // Step 1: Calculate instruction depth and loop depth.
+ // Optimized-Loop:
+ // loop with CMOV-group-candidates converted into branches.
+ //
+ // Instruction-Depth:
+ // instruction latency + max operand depth.
+ // * For CMOV instruction in optimized loop the depth is calculated as:
+ // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
+ // TODO: Find a better way to estimate the latency of the branch instruction
+ // rather than using the CMOV latency.
+ //
+ // Loop-Depth:
+ // max instruction depth of all instructions in the loop.
+ // Note: instruction with max depth represents the critical-path in the loop.
+ //
+ // Loop-Depth[i]:
+ // Loop-Depth calculated for first `i` iterations.
+ // Note: it is enough to calculate depth for up to two iterations.
+ //
+ // Depth-Diff[i]:
+ // Number of cycles saved in first 'i` iterations by optimizing the loop.
+ //===--------------------------------------------------------------------===//
+ for (unsigned I = 0; I < LoopIterations; ++I) {
+ DepthInfo &MaxDepth = LoopDepth[I];
+ for (auto *MBB : CurrLoop->getBlocks()) {
+ // Clear physical registers Def map.
+ RegDefMaps[PhyRegType].clear();
+ for (MachineInstr &MI : *MBB) {
+ unsigned MIDepth = 0;
+ unsigned MIDepthOpt = 0;
+ bool IsCMOV = CmovInstructions.count(&MI);
+ for (auto &MO : MI.uses()) {
+ // Checks for "isUse()" as "uses()" returns also implicit definitions.
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)];
+ if (MachineInstr *DefMI = RDM.lookup(Reg)) {
+ OperandToDefMap[&MO] = DefMI;
+ DepthInfo Info = DepthMap.lookup(DefMI);
+ MIDepth = std::max(MIDepth, Info.Depth);
+ if (!IsCMOV)
+ MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
+ }
+ }
+
+ if (IsCMOV)
+ MIDepthOpt = getDepthOfOptCmov(
+ DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
+ DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);
+
+ // Iterates over all operands to handle implicit definitions as well.
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI;
+ }
+
+ unsigned Latency = TSchedModel.computeInstrLatency(&MI);
+ DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
+ MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
+ MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
+ }
+ }
+ }
+
+ unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
+ LoopDepth[1].Depth - LoopDepth[1].OptDepth};
+
+ //===--------------------------------------------------------------------===//
+ // Step 2: Check if Loop worth to be optimized.
+ // Worth-Optimize-Loop:
+ // case 1: Diff[1] == Diff[0]
+ // Critical-path is iteration independent - there is no dependency
+ // of critical-path instructions on critical-path instructions of
+ // previous iteration.
+ // Thus, it is enough to check gain percent of 1st iteration -
+ // To be conservative, the optimized loop need to have a depth of
+ // 12.5% cycles less than original loop, per iteration.
+ //
+ // case 2: Diff[1] > Diff[0]
+ // Critical-path is iteration dependent - there is dependency of
+ // critical-path instructions on critical-path instructions of
+ // previous iteration.
+ // Thus, it is required to check the gradient of the gain - the
+ // change in Depth-Diff compared to the change in Loop-Depth between
+ // 1st and 2nd iterations.
+ // To be conservative, the gradient need to be at least 50%.
+ //
+ // If loop is not worth optimizing, remove all CMOV-group-candidates.
+ //===--------------------------------------------------------------------===//
+ bool WorthOptLoop = false;
+ if (Diff[1] == Diff[0])
+ WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
+ else if (Diff[1] > Diff[0])
+ WorthOptLoop =
+ (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth);
+
+ if (!WorthOptLoop)
+ return false;
+
+ ++NumOfLoopCandidate;
+
+ //===--------------------------------------------------------------------===//
+ // Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
+ // Worth-Optimize-Group:
+ // Iff it worths to optimize all CMOV instructions in the group.
+ //
+ // Worth-Optimize-CMOV:
+ // Predicted branch is faster than CMOV by the difference between depth of
+ // condition operand and depth of taken (predicted) value operand.
+ // To be conservative, the gain of such CMOV transformation should cover at
+ // at least 25% of branch-misprediction-penalty.
+ //===--------------------------------------------------------------------===//
+ unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
+ CmovGroups TempGroups;
+ std::swap(TempGroups, CmovInstGroups);
+ for (auto &Group : TempGroups) {
+ bool WorthOpGroup = true;
+ for (auto *MI : Group) {
+ // Avoid CMOV instruction which value is used as a pointer to load from.
+ // This is another conservative check to avoid converting CMOV instruction
+ // used with tree-search like algorithm, where the branch is unpredicted.
+ auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
+ if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) {
+ unsigned Op = UIs.begin()->getOpcode();
+ if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
+ WorthOpGroup = false;
+ break;
+ }
+ }
+
+ unsigned CondCost =
+ DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth;
+ unsigned ValCost = getDepthOfOptCmov(
+ DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
+ DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
+ if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
+ WorthOpGroup = false;
+ break;
+ }
+ }
+
+ if (WorthOpGroup)
+ CmovInstGroups.push_back(Group);
+ }
+
+ return !CmovInstGroups.empty();
+}
+
+static bool checkEFLAGSLive(MachineInstr *MI) {
+ if (MI->killsRegister(X86::EFLAGS))
+ return false;
+
+ // The EFLAGS operand of MI might be missing a kill marker.
+ // Figure out whether EFLAGS operand should LIVE after MI instruction.
+ MachineBasicBlock *BB = MI->getParent();
+ MachineBasicBlock::iterator ItrMI = MI;
+
+ // Scan forward through BB for a use/def of EFLAGS.
+ for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) {
+ if (I->readsRegister(X86::EFLAGS))
+ return true;
+ if (I->definesRegister(X86::EFLAGS))
+ return false;
+ }
+
+ // We hit the end of the block, check whether EFLAGS is live into a successor.
+ for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) {
+ if ((*I)->isLiveIn(X86::EFLAGS))
+ return true;
+ }
+
+ return false;
+}
+
+void X86CmovConverterPass::convertCmovInstsToBranches(
+ SmallVectorImpl<MachineInstr *> &Group) const {
+ assert(!Group.empty() && "No CMOV instructions to convert");
+ ++NumOfOptimizedCmovGroups;
+
+ // To convert a CMOVcc instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // Before
+ // -----
+ // MBB:
+ // cond = cmp ...
+ // v1 = CMOVge t1, f1, cond
+ // v2 = CMOVlt t2, f2, cond
+ // v3 = CMOVge v1, f3, cond
+ //
+ // After
+ // -----
+ // MBB:
+ // cond = cmp ...
+ // jge %SinkMBB
+ //
+ // FalseMBB:
+ // jmp %SinkMBB
+ //
+ // SinkMBB:
+ // %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
+ // %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
+ // ; true-value with false-value
+ // %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
+ // ; previous Phi instruction result
+
+ MachineInstr &MI = *Group.front();
+ MachineInstr *LastCMOV = Group.back();
+ DebugLoc DL = MI.getDebugLoc();
+ X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode()));
+ X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction::iterator It = ++MBB->getIterator();
+ MachineFunction *F = MBB->getParent();
+ const BasicBlock *BB = MBB->getBasicBlock();
+
+ MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
+ F->insert(It, FalseMBB);
+ F->insert(It, SinkMBB);
+
+ // If the EFLAGS register isn't dead in the terminator, then claim that it's
+ // live into the sink and copy blocks.
+ if (checkEFLAGSLive(LastCMOV)) {
+ FalseMBB->addLiveIn(X86::EFLAGS);
+ SinkMBB->addLiveIn(X86::EFLAGS);
+ }
+
+ // Transfer the remainder of BB and its successor edges to SinkMBB.
+ SinkMBB->splice(SinkMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
+ SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the false and sink blocks as its successors.
+ MBB->addSuccessor(FalseMBB);
+ MBB->addSuccessor(SinkMBB);
+
+ // Create the conditional branch instruction.
+ BuildMI(MBB, DL, TII->get(X86::GetCondBranchFromCond(CC))).addMBB(SinkMBB);
+
+ // Add the sink block to the false block successors.
+ FalseMBB->addSuccessor(SinkMBB);
+
+ MachineInstrBuilder MIB;
+ MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
+ MachineBasicBlock::iterator MIItEnd =
+ std::next(MachineBasicBlock::iterator(LastCMOV));
+ MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
+ // As we are creating the PHIs, we have to be careful if there is more than
+ // one. Later CMOVs may reference the results of earlier CMOVs, but later
+ // PHIs have to reference the individual true/false inputs from earlier PHIs.
+ // That also means that PHI construction must work forward from earlier to
+ // later, and that the code must maintain a mapping from earlier PHI's
+ // destination registers, and the registers that went into the PHI.
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
+
+ for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
+ unsigned DestReg = MIIt->getOperand(0).getReg();
+ unsigned Op1Reg = MIIt->getOperand(1).getReg();
+ unsigned Op2Reg = MIIt->getOperand(2).getReg();
+
+ // If this CMOV we are processing is the opposite condition from the jump we
+ // generated, then we have to swap the operands for the PHI that is going to
+ // be generated.
+ if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC)
+ std::swap(Op1Reg, Op2Reg);
+
+ auto Op1Itr = RegRewriteTable.find(Op1Reg);
+ if (Op1Itr != RegRewriteTable.end())
+ Op1Reg = Op1Itr->second.first;
+
+ auto Op2Itr = RegRewriteTable.find(Op2Reg);
+ if (Op2Itr != RegRewriteTable.end())
+ Op2Reg = Op2Itr->second.second;
+
+ // SinkMBB:
+ // %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
+ // ...
+ MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
+ .addReg(Op1Reg)
+ .addMBB(FalseMBB)
+ .addReg(Op2Reg)
+ .addMBB(MBB);
+ (void)MIB;
+ DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
+ DEBUG(dbgs() << "\tTo: "; MIB->dump());
+
+ // Add this PHI to the rewrite table.
+ RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
+ }
+
+ // Now remove the CMOV(s).
+ MBB->erase(MIItBegin, MIItEnd);
+}
+
+} // End anonymous namespace.
+
+FunctionPass *llvm::createX86CmovConverterPass() {
+ return new X86CmovConverterPass();
+}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ee9e78146305..527e5d568ac6 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1187,7 +1187,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
CC != CallingConv::X86_StdCall &&
CC != CallingConv::X86_ThisCall &&
CC != CallingConv::X86_64_SysV &&
- CC != CallingConv::X86_64_Win64)
+ CC != CallingConv::Win64)
return false;
// Don't handle popping bytes if they don't fit the ret's immediate.
@@ -3171,7 +3171,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
case CallingConv::X86_ThisCall:
- case CallingConv::X86_64_Win64:
+ case CallingConv::Win64:
case CallingConv::X86_64_SysV:
break;
}
diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp
index c28746f96439..95c6f2a3fa34 100644
--- a/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/lib/Target/X86/X86FixupBWInsts.cpp
@@ -22,7 +22,7 @@
/// instructions and register-to-register moves. It would
/// seem like cmov(s) would also be affected, but because of the way cmov is
/// really implemented by most machines as reading both the destination and
-/// and source regsters, and then "merging" the two based on a condition,
+/// and source registers, and then "merging" the two based on a condition,
/// it really already should be considered as having a true dependence on the
/// destination register as well.
///
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 65486cf7f529..44eecd664714 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1335,6 +1335,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTTZ, VT, Custom);
}
+ // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
+ MVT::v8i64}) {
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ }
+
// Need to promote to 64-bit even though we have 32-bit masked instructions
// because the IR optimizers rearrange bitcasts around logic ops leaving
// too many variations to handle if we don't promote them.
@@ -1663,10 +1670,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 4;
- // TODO: These control memcmp expansion in CGP and are set low to prevent
- // altering the vector expansion for 16/32 byte memcmp in SelectionDAGBuilder.
- MaxLoadsPerMemcmp = 1;
- MaxLoadsPerMemcmpOptSize = 1;
+ // TODO: These control memcmp expansion in CGP and could be raised higher, but
+ // that needs to benchmarked and balanced with the potential use of vector
+ // load/store types (PR33329).
+ MaxLoadsPerMemcmp = 4;
+ MaxLoadsPerMemcmpOptSize = 2;
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
@@ -2661,7 +2669,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
// C calling conventions:
case CallingConv::C:
- case CallingConv::X86_64_Win64:
+ case CallingConv::Win64:
case CallingConv::X86_64_SysV:
// Callee pop conventions:
case CallingConv::X86_ThisCall:
@@ -20188,7 +20196,10 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
- auto *C = cast<ConstantSDNode>(ScaleOp);
+ auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ // Scale must be constant.
+ if (!C)
+ return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
EVT MaskVT = Mask.getValueType();
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
@@ -20210,7 +20221,10 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
- auto *C = cast<ConstantSDNode>(ScaleOp);
+ auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ // Scale must be constant.
+ if (!C)
+ return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
@@ -20235,7 +20249,10 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
- auto *C = cast<ConstantSDNode>(ScaleOp);
+ auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ // Scale must be constant.
+ if (!C)
+ return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -20254,7 +20271,10 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue ScaleOp, SDValue Chain,
const X86Subtarget &Subtarget) {
SDLoc dl(Op);
- auto *C = cast<ConstantSDNode>(ScaleOp);
+ auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ // Scale must be constant.
+ if (!C)
+ return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -22665,10 +22685,31 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SDLoc DL(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
+ unsigned Opcode = Op.getOpcode();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ if (Subtarget.hasAVX512()) {
+ // Attempt to rotate by immediate.
+ APInt UndefElts;
+ SmallVector<APInt, 16> EltBits;
+ if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) {
+ if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) {
+ return EltBits[0] == V;
+ })) {
+ unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
+ uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits);
+ return DAG.getNode(Op, DL, VT, R,
+ DAG.getConstant(RotateAmt, DL, MVT::i8));
+ }
+ }
+
+ // Else, fall-back on VPROLV/VPRORV.
+ return Op;
+ }
assert(VT.isVector() && "Custom lowering only for vector rotates!");
assert(Subtarget.hasXOP() && "XOP support required for vector rotates!");
- assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported");
+ assert((Opcode == ISD::ROTL) && "Only ROTL supported");
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right.
@@ -22683,7 +22724,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
- assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range");
+ assert(RotateAmt < EltSizeInBits && "Rotation out of range");
return DAG.getNode(X86ISD::VPROTI, DL, VT, R,
DAG.getConstant(RotateAmt, DL, MVT::i8));
}
@@ -24030,7 +24071,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG);
case ISD::UMUL_LOHI:
case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
- case ISD::ROTL: return LowerRotate(Op, Subtarget, DAG);
+ case ISD::ROTL:
+ case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index cc5c09cbf0e5..705d0f7a5cf7 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1759,29 +1759,29 @@ let Predicates = Preds in {
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and _.KRCWM:$mask,
+ (_.KVT (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
_.RC:$src1, _.RC:$src2),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and (_.KVT _.KRCWM:$mask),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
(_.LdFrag addr:$src2))))))),
(i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
_.RC:$src1, addr:$src2),
NewInf.KRC)>;
}
@@ -1798,7 +1798,7 @@ let Predicates = Preds in {
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
@@ -1879,7 +1879,7 @@ defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
-defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
+defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
"VPCMPEQQZ", [HasAVX512]>;
defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
"VPCMPEQQZ", [HasAVX512]>;
@@ -2127,17 +2127,17 @@ multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo Ne
list<Predicate> Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
imm:$cc)),
(i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)),
(i64 0)),
@@ -2145,37 +2145,37 @@ let Predicates = Preds in {
addr:$src2,
imm:$cc),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and _.KRCWM:$mask,
+ (_.KVT (and _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc))),
(i64 0)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
- _.RC:$src1,
+ _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (and (_.KVT _.KRCWM:$mask),
- (_.KVT (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert
+ (_.KVT (and (_.KVT _.KRCWM:$mask),
+ (_.KVT (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
(_.LdFrag addr:$src2))),
imm:$cc)))),
(i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
_.RC:$src1,
addr:$src2,
imm:$cc),
NewInf.KRC)>;
}
}
-
+
multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
SDNode OpNode, string InstrStr,
- list<Predicate> Preds>
+ list<Predicate> Preds>
: avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
@@ -2187,7 +2187,7 @@ let Predicates = Preds in {
addr:$src2,
imm:$cc),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
(_.KVT (and (_.KVT _.KRCWM:$mask),
(_.KVT (OpNode (_.VT _.RC:$src1),
@@ -2447,17 +2447,17 @@ multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo Ne
string InstrStr, list<Predicate> Preds> {
let Predicates = Preds in {
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (X86cmpm (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
imm:$cc)),
(i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
-
+
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (X86cmpm (_.VT _.RC:$src1),
+ (_.KVT (X86cmpm (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
imm:$cc)),
(i64 0)),
@@ -2477,19 +2477,19 @@ let Predicates = Preds in {
NewInf.KRC)>;
}
}
-
+
multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
- string InstrStr, list<Predicate> Preds>
+ string InstrStr, list<Predicate> Preds>
: avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
let Predicates = Preds in
def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
- (_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
+ (_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
imm:$cc,
(i32 FROUND_NO_EXC))),
(i64 0)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
+ (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
_.RC:$src2,
imm:$cc),
NewInf.KRC)>;
@@ -2817,16 +2817,16 @@ let Predicates = [HasAVX512] in {
def : Pat<(maskVT (scalar_to_vector GR32:$src)),
(COPY_TO_REGCLASS GR32:$src, maskRC)>;
- def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))),
+ def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))),
(COPY_TO_REGCLASS maskRC:$src, GR32)>;
def : Pat<(maskVT (scalar_to_vector GR8:$src)),
(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
- def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))),
+ def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
- def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))),
+ def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))),
(COPY_TO_REGCLASS maskRC:$src, GR32)>;
}
@@ -3036,7 +3036,7 @@ def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
-def : Pat<(insert_subvector (v16i1 immAllZerosV),
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
@@ -3044,8 +3044,8 @@ def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
(i8 8)), (i8 8))>;
-def : Pat<(insert_subvector (v16i1 immAllZerosV),
- (v8i1 (and VK8:$mask,
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (and VK8:$mask,
(OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
@@ -3063,7 +3063,7 @@ def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2)
(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
-def : Pat<(insert_subvector (v16i1 immAllZerosV),
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
@@ -3072,8 +3072,8 @@ def : Pat<(insert_subvector (v16i1 immAllZerosV),
imm:$cc),
(i8 8)), (i8 8))>;
-def : Pat<(insert_subvector (v16i1 immAllZerosV),
- (v8i1 (and VK8:$mask,
+def : Pat<(insert_subvector (v16i1 immAllZerosV),
+ (v8i1 (and VK8:$mask,
(OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
(i64 0)),
(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
@@ -3379,35 +3379,35 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
HasAVX512>,
avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
- HasAVX512, "VMOVDQA32">,
+ HasAVX512, "VMOVDQA32">,
PD, EVEX_CD8<32, CD8VF>;
defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
HasAVX512>,
avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
- HasAVX512, "VMOVDQA64">,
+ HasAVX512, "VMOVDQA64">,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
- HasBWI, "VMOVDQU8">,
+ HasBWI, "VMOVDQU8">,
XD, EVEX_CD8<8, CD8VF>;
defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
- HasBWI, "VMOVDQU16">,
+ HasBWI, "VMOVDQU16">,
XD, VEX_W, EVEX_CD8<16, CD8VF>;
defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
null_frag>,
avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
- HasAVX512, "VMOVDQU32">,
+ HasAVX512, "VMOVDQU32">,
XS, EVEX_CD8<32, CD8VF>;
defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
null_frag>,
avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
- HasAVX512, "VMOVDQU64">,
+ HasAVX512, "VMOVDQU64">,
XS, VEX_W, EVEX_CD8<64, CD8VF>;
// Special instructions to help with spilling when we don't have VLX. We need
@@ -3964,49 +3964,49 @@ def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
let hasSideEffects = 0 in {
- def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, FR32X:$src2),
"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], NoItinerary>, XS, EVEX_4V, VEX_LIG,
FoldGenData<"VMOVSSZrr">;
let Constraints = "$src0 = $dst" in
- def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
- (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
+ def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
VR128X:$src1, FR32X:$src2),
"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
[], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG,
FoldGenData<"VMOVSSZrrk">;
-
- def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+
+ def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2),
"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
[], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
FoldGenData<"VMOVSSZrrkz">;
- def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, FR64X:$src2),
"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W,
FoldGenData<"VMOVSDZrr">;
let Constraints = "$src0 = $dst" in
- def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
- (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
+ def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
VR128X:$src1, FR64X:$src2),
"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
[], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG,
- VEX_W, FoldGenData<"VMOVSDZrrk">;
+ VEX_W, FoldGenData<"VMOVSDZrrk">;
- def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
- (ins f64x_info.KRCWM:$mask, VR128X:$src1,
+ def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f64x_info.KRCWM:$mask, VR128X:$src1,
FR64X:$src2),
"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
- [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
+ [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
VEX_W, FoldGenData<"VMOVSDZrrkz">;
}
@@ -5676,6 +5676,109 @@ defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
+
+// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
+let Predicates = [HasAVX512, NoVLX] in {
+ def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPROLVQZrr
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
+ sub_xmm)>;
+ def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPROLVQZrr
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ sub_ymm)>;
+
+ def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
+ (EXTRACT_SUBREG (v16i32
+ (VPROLVDZrr
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
+ sub_xmm)>;
+ def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (EXTRACT_SUBREG (v16i32
+ (VPROLVDZrr
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ sub_ymm)>;
+
+ def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPROLQZri
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ imm:$src2)), sub_xmm)>;
+ def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPROLQZri
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ imm:$src2)), sub_ymm)>;
+
+ def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v16i32
+ (VPROLDZri
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ imm:$src2)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v16i32
+ (VPROLDZri
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ imm:$src2)), sub_ymm)>;
+}
+
+// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
+let Predicates = [HasAVX512, NoVLX] in {
+ def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPRORVQZrr
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
+ sub_xmm)>;
+ def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPRORVQZrr
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ sub_ymm)>;
+
+ def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
+ (EXTRACT_SUBREG (v16i32
+ (VPRORVDZrr
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
+ sub_xmm)>;
+ def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (EXTRACT_SUBREG (v16i32
+ (VPRORVDZrr
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
+ sub_ymm)>;
+
+ def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPRORQZri
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ imm:$src2)), sub_xmm)>;
+ def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPRORQZri
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ imm:$src2)), sub_ymm)>;
+
+ def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v16i32
+ (VPRORDZri
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ imm:$src2)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v16i32
+ (VPRORDZri
+ (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ imm:$src2)), sub_ymm)>;
+}
+
//===-------------------------------------------------------------------===//
// 1-src variable permutation VPERMW/D/Q
//===-------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 7e4cba1c8345..343da2573b55 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -224,7 +224,7 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
const TargetRegisterClass *
X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
const Function *F = MF.getFunction();
- if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
+ if (IsWin64 || (F && F->getCallingConv() == CallingConv::Win64))
return &X86::GR64_TCW64RegClass;
else if (Is64Bit)
return &X86::GR64_TCRegClass;
@@ -334,7 +334,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Is64Bit)
return CSR_64_MostRegs_SaveList;
break;
- case CallingConv::X86_64_Win64:
+ case CallingConv::Win64:
if (!HasSSE)
return CSR_Win64_NoSSE_SaveList;
return CSR_Win64_SaveList;
@@ -450,7 +450,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (Is64Bit)
return CSR_64_MostRegs_RegMask;
break;
- case CallingConv::X86_64_Win64:
+ case CallingConv::Win64:
return CSR_Win64_RegMask;
case CallingConv::X86_64_SysV:
return CSR_64_RegMask;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index a12fa68faf4f..d831a7974359 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -663,5 +663,6 @@ include "X86ScheduleAtom.td"
include "X86SchedSandyBridge.td"
include "X86SchedHaswell.td"
include "X86ScheduleSLM.td"
+include "X86ScheduleZnver1.td"
include "X86ScheduleBtVer2.td"
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index ed53893b779c..9dcc968a1a7a 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -371,6 +371,22 @@ def : WriteRes<WriteFence, [JSAGU]>;
def : WriteRes<WriteNop, []>;
////////////////////////////////////////////////////////////////////////////////
+// SSE4A instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def WriteEXTRQ: SchedWriteRes<[JFPU01]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+def : InstRW<[WriteEXTRQ], (instregex "EXTRQ")>;
+
+def WriteINSERTQ: SchedWriteRes<[JFPU01]> {
+ let Latency = 2;
+ let ResourceCycles = [4];
+}
+def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>;
+
+////////////////////////////////////////////////////////////////////////////////
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td
new file mode 100644
index 000000000000..d5b4cfe2ddee
--- /dev/null
+++ b/lib/Target/X86/X86ScheduleZnver1.td
@@ -0,0 +1,223 @@
+//=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Znver1 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Znver1Model : SchedMachineModel {
+ // Zen can decode 4 instructions per cycle.
+ let IssueWidth = 4;
+ // Based on the reorder buffer we define MicroOpBufferSize
+ let MicroOpBufferSize = 192;
+ let LoadLatency = 4;
+ let MispredictPenalty = 17;
+ let HighLatency = 25;
+ let PostRAScheduler = 1;
+
+ // FIXME: This variable is required for incomplete model.
+ // We haven't catered all instructions.
+ // So, we reset the value of this variable so as to
+ // say that the model is incomplete.
+ let CompleteModel = 0;
+}
+
+let SchedModel = Znver1Model in {
+
+// Zen can issue micro-ops to 10 different units in one cycle.
+// These are
+// * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
+// * Two AGU units (ZAGU0, ZAGU1)
+// * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
+// AGUs feed load store queues @two loads and 1 store per cycle.
+
+// Four ALU units are defined below
+def ZnALU0 : ProcResource<1>;
+def ZnALU1 : ProcResource<1>;
+def ZnALU2 : ProcResource<1>;
+def ZnALU3 : ProcResource<1>;
+
+// Two AGU units are defined below
+def ZnAGU0 : ProcResource<1>;
+def ZnAGU1 : ProcResource<1>;
+
+// Four FPU units are defined below
+def ZnFPU0 : ProcResource<1>;
+def ZnFPU1 : ProcResource<1>;
+def ZnFPU2 : ProcResource<1>;
+def ZnFPU3 : ProcResource<1>;
+
+// FPU grouping
+def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>;
+def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
+def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>;
+def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>;
+def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>;
+def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>;
+def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>;
+def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>;
+
+// Below are the grouping of the units.
+// Micro-ops to be issued to multiple units are tackled this way.
+
+// ALU grouping
+// ZnALU03 - 0,3 grouping
+def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
+
+// 56 Entry (14x4 entries) Int Scheduler
+def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
+ let BufferSize=56;
+}
+
+// 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
+// but are relevant for some instructions
+def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
+ let BufferSize=28;
+}
+
+// Integer Multiplication issued on ALU1.
+def ZnMultiplier : ProcResource<1>;
+
+// Integer division issued on ALU2.
+def ZnDivider : ProcResource<1>;
+
+// 4 Cycles load-to use Latency is captured
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// (a folded load is an instruction that loads and does some operation)
+// Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops.
+// a. load and
+// b. addpd
+// This multiclass is for folded loads for integer units.
+multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant takes 1-cycle on Execution Port.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on ZnAGU
+ // adds 4 cycles to the latency.
+ def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
+ let Latency = !add(Lat, 4);
+ }
+}
+
+// This multiclass is for folded loads for floating point units.
+multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant takes 1-cycle on Execution Port.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on ZnAGU
+ // adds 7 cycles to the latency.
+ def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
+ let Latency = !add(Lat, 7);
+ }
+}
+
+// WriteRMW is set for instructions with Memory write
+// operation in codegen
+def : WriteRes<WriteRMW, [ZnAGU]>;
+
+def : WriteRes<WriteStore, [ZnAGU]>;
+def : WriteRes<WriteMove, [ZnALU]>;
+def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
+
+def : WriteRes<WriteZero, []>;
+def : WriteRes<WriteLEA, [ZnALU]>;
+defm : ZnWriteResPair<WriteALU, ZnALU, 1>;
+defm : ZnWriteResPair<WriteShift, ZnALU, 1>;
+defm : ZnWriteResPair<WriteJump, ZnALU, 1>;
+
+// IDIV
+def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> {
+ let Latency = 41;
+ let ResourceCycles = [1, 41];
+}
+
+def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> {
+ let Latency = 45;
+ let ResourceCycles = [1, 4, 41];
+}
+
+// IMUL
+def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
+ let Latency = 4;
+}
+def : WriteRes<WriteIMul, [ZnALU1, ZnMultiplier]> {
+ let Latency = 4;
+}
+
+def : WriteRes<WriteIMulLd,[ZnALU1, ZnMultiplier]> {
+ let Latency = 8;
+}
+
+// Floating point operations
+defm : ZnWriteResFpuPair<WriteFHAdd, ZnFPU0, 3>;
+defm : ZnWriteResFpuPair<WriteFAdd, ZnFPU0, 3>;
+defm : ZnWriteResFpuPair<WriteFBlend, ZnFPU01, 1>;
+defm : ZnWriteResFpuPair<WriteFVarBlend, ZnFPU01, 1>;
+defm : ZnWriteResFpuPair<WriteVarBlend, ZnFPU0, 1>;
+defm : ZnWriteResFpuPair<WriteCvtI2F, ZnFPU3, 5>;
+defm : ZnWriteResFpuPair<WriteCvtF2F, ZnFPU3, 5>;
+defm : ZnWriteResFpuPair<WriteCvtF2I, ZnFPU3, 5>;
+defm : ZnWriteResFpuPair<WriteFDiv, ZnFPU3, 15>;
+defm : ZnWriteResFpuPair<WriteFShuffle, ZnFPU12, 1>;
+defm : ZnWriteResFpuPair<WriteFMul, ZnFPU0, 5>;
+defm : ZnWriteResFpuPair<WriteFRcp, ZnFPU01, 5>;
+defm : ZnWriteResFpuPair<WriteFRsqrt, ZnFPU01, 5>;
+defm : ZnWriteResFpuPair<WriteFSqrt, ZnFPU3, 20>;
+
+// Vector integer operations which uses FPU units
+defm : ZnWriteResFpuPair<WriteVecShift, ZnFPU, 1>;
+defm : ZnWriteResFpuPair<WriteVecLogic, ZnFPU, 1>;
+defm : ZnWriteResFpuPair<WritePHAdd, ZnFPU, 1>;
+defm : ZnWriteResFpuPair<WriteVecALU, ZnFPU, 1>;
+defm : ZnWriteResFpuPair<WriteVecIMul, ZnFPU0, 4>;
+defm : ZnWriteResFpuPair<WriteShuffle, ZnFPU, 1>;
+defm : ZnWriteResFpuPair<WriteBlend, ZnFPU01, 1>;
+defm : ZnWriteResFpuPair<WriteShuffle256, ZnFPU, 2>;
+
+// Vector Shift Operations
+defm : ZnWriteResFpuPair<WriteVarVecShift, ZnFPU12, 1>;
+
+// AES Instructions.
+defm : ZnWriteResFpuPair<WriteAESDecEnc, ZnFPU01, 4>;
+defm : ZnWriteResFpuPair<WriteAESIMC, ZnFPU01, 4>;
+defm : ZnWriteResFpuPair<WriteAESKeyGen, ZnFPU01, 4>;
+
+def : WriteRes<WriteFence, [ZnAGU]>;
+def : WriteRes<WriteNop, []>;
+
+// Following instructions with latency=100 are microcoded.
+// We set long latency so as to block the entire pipeline.
+defm : ZnWriteResFpuPair<WriteFShuffle256, ZnFPU, 100>;
+
+//Microcoded Instructions
+let Latency = 100 in {
+ def : WriteRes<WriteMicrocoded, []>;
+ def : WriteRes<WriteSystem, []>;
+ def : WriteRes<WriteMPSAD, []>;
+ def : WriteRes<WriteMPSADLd, []>;
+ def : WriteRes<WriteCLMul, []>;
+ def : WriteRes<WriteCLMulLd, []>;
+ def : WriteRes<WritePCmpIStrM, []>;
+ def : WriteRes<WritePCmpIStrMLd, []>;
+ def : WriteRes<WritePCmpEStrI, []>;
+ def : WriteRes<WritePCmpEStrILd, []>;
+ def : WriteRes<WritePCmpEStrM, []>;
+ def : WriteRes<WritePCmpEStrMLd, []>;
+ def : WriteRes<WritePCmpIStrI, []>;
+ def : WriteRes<WritePCmpIStrILd, []>;
+ }
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index fa0afe29586b..427a0001bef9 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -597,7 +597,7 @@ public:
case CallingConv::Intel_OCL_BI:
return isTargetWin64();
// This convention allows using the Win64 convention on other targets.
- case CallingConv::X86_64_Win64:
+ case CallingConv::Win64:
return true;
// This convention allows using the SysV convention on Windows targets.
case CallingConv::X86_64_SysV:
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 8d891c983fab..08c2cdaefe71 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -375,6 +375,7 @@ bool X86PassConfig::addILPOpts() {
addPass(&EarlyIfConverterID);
if (EnableMachineCombinerPass)
addPass(&MachineCombinerID);
+ addPass(createX86CmovConverterPass());
return true;
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index aaa6d58bd134..c16207973b39 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -40,6 +40,8 @@ public:
~X86TargetMachine() override;
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
+ // The no argument getSubtargetImpl, while it exists on some targets, is
+ // deprecated and should not be used.
const X86Subtarget *getSubtargetImpl() const = delete;
TargetIRAnalysis getTargetIRAnalysis() override;
diff --git a/lib/ToolDrivers/CMakeLists.txt b/lib/ToolDrivers/CMakeLists.txt
index ad458450fda3..28da36bba209 100644
--- a/lib/ToolDrivers/CMakeLists.txt
+++ b/lib/ToolDrivers/CMakeLists.txt
@@ -1 +1,2 @@
+add_subdirectory(llvm-dlltool)
add_subdirectory(llvm-lib)
diff --git a/lib/ToolDrivers/LLVMBuild.txt b/lib/ToolDrivers/LLVMBuild.txt
index 7da9a5c01005..a49e04bdf3c1 100644
--- a/lib/ToolDrivers/LLVMBuild.txt
+++ b/lib/ToolDrivers/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = llvm-lib
+subdirectories = llvm-dlltool llvm-lib
[component_0]
type = Group
diff --git a/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt b/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt
new file mode 100644
index 000000000000..52bd5cba86f4
--- /dev/null
+++ b/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_TARGET_DEFINITIONS Options.td)
+tablegen(LLVM Options.inc -gen-opt-parser-defs)
+add_public_tablegen_target(DllOptionsTableGen)
+
+add_llvm_library(LLVMDlltoolDriver
+ DlltoolDriver.cpp
+ )
+
+add_dependencies(LLVMDlltoolDriver DllOptionsTableGen)
diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
new file mode 100644
index 000000000000..a7de79306074
--- /dev/null
+++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -0,0 +1,160 @@
+//===- DlltoolDriver.cpp - dlltool.exe-compatible driver ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines an interface to a dlltool.exe-compatible driver.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
+#include "llvm/Object/ArchiveWriter.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
+#include "llvm/Object/COFFModuleDefinition.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Path.h"
+
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::COFF;
+
+namespace {
+
+enum {
+ OPT_INVALID = 0,
+#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
+#include "Options.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Options.inc"
+#undef PREFIX
+
+static const llvm::opt::OptTable::Info infoTable[] = {
+#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
+ {X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \
+ X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
+#include "Options.inc"
+#undef OPTION
+};
+
+class DllOptTable : public llvm::opt::OptTable {
+public:
+ DllOptTable() : OptTable(infoTable, false) {}
+};
+
+} // namespace
+
+std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;
+
+// Opens a file. Path has to be resolved already.
+// Newly created memory buffers are owned by this driver.
+MemoryBufferRef openFile(StringRef Path) {
+ ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MB = MemoryBuffer::getFile(Path);
+
+ if (std::error_code EC = MB.getError())
+ llvm::errs() << "fail openFile: " << EC.message() << "\n";
+
+ MemoryBufferRef MBRef = MB.get()->getMemBufferRef();
+ OwningMBs.push_back(std::move(MB.get())); // take ownership
+ return MBRef;
+}
+
+static MachineTypes getEmulation(StringRef S) {
+ return StringSwitch<MachineTypes>(S)
+ .Case("i386", IMAGE_FILE_MACHINE_I386)
+ .Case("i386:x86-64", IMAGE_FILE_MACHINE_AMD64)
+ .Case("arm", IMAGE_FILE_MACHINE_ARMNT)
+ .Default(IMAGE_FILE_MACHINE_UNKNOWN);
+}
+
+static std::string getImplibPath(std::string Path) {
+ SmallString<128> Out = StringRef("lib");
+ Out.append(Path);
+ sys::path::replace_extension(Out, ".a");
+ return Out.str();
+}
+
+int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
+ DllOptTable Table;
+ unsigned MissingIndex;
+ unsigned MissingCount;
+ llvm::opt::InputArgList Args =
+ Table.ParseArgs(ArgsArr.slice(1), MissingIndex, MissingCount);
+ if (MissingCount) {
+ llvm::errs() << Args.getArgString(MissingIndex) << ": missing argument\n";
+ return 1;
+ }
+
+ // Handle when no input or output is specified
+ if (Args.hasArgNoClaim(OPT_INPUT) ||
+ (!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) {
+ Table.PrintHelp(outs(), ArgsArr[0], "dlltool", false);
+ llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm\n";
+ return 1;
+ }
+
+ if (!Args.hasArgNoClaim(OPT_m) && Args.hasArgNoClaim(OPT_d)) {
+ llvm::errs() << "error: no target machine specified\n"
+ << "supported targets: i386, i386:x86-64, arm\n";
+ return 1;
+ }
+
+ for (auto *Arg : Args.filtered(OPT_UNKNOWN))
+ llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
+
+ MemoryBufferRef MB;
+ if (auto *Arg = Args.getLastArg(OPT_d))
+ MB = openFile(Arg->getValue());
+
+ if (!MB.getBufferSize()) {
+ llvm::errs() << "definition file empty\n";
+ return 1;
+ }
+
+ COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN;
+ if (auto *Arg = Args.getLastArg(OPT_m))
+ Machine = getEmulation(Arg->getValue());
+
+ if (Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
+ llvm::errs() << "unknown target\n";
+ return 1;
+ }
+
+ Expected<COFFModuleDefinition> Def =
+ parseCOFFModuleDefinition(MB, Machine, true);
+
+ if (!Def) {
+ llvm::errs() << "error parsing definition\n"
+ << errorToErrorCode(Def.takeError()).message();
+ return 1;
+ }
+
+ // Do this after the parser because parseCOFFModuleDefinition sets OutputFile.
+ if (auto *Arg = Args.getLastArg(OPT_D))
+ Def->OutputFile = Arg->getValue();
+
+ if (Def->OutputFile.empty()) {
+ llvm::errs() << "no output file specified\n";
+ return 1;
+ }
+
+ std::string Path = Args.getLastArgValue(OPT_l);
+ if (Path.empty())
+ Path = getImplibPath(Def->OutputFile);
+
+ if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine))
+ return 1;
+ return 0;
+}
diff --git a/lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt b/lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt
new file mode 100644
index 000000000000..11736eb47bcb
--- /dev/null
+++ b/lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ToolDrivers/llvm-dlltool/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = DlltoolDriver
+parent = Libraries
+required_libraries = Object Option Support
diff --git a/lib/ToolDrivers/llvm-dlltool/Options.td b/lib/ToolDrivers/llvm-dlltool/Options.td
new file mode 100644
index 000000000000..213c6a4d7674
--- /dev/null
+++ b/lib/ToolDrivers/llvm-dlltool/Options.td
@@ -0,0 +1,26 @@
+include "llvm/Option/OptParser.td"
+
+def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target machine">;
+def m_long : JoinedOrSeparate<["--"], "machine">, Alias<m>;
+
+def l: JoinedOrSeparate<["-"], "l">, HelpText<"Generate an import lib">;
+def l_long : JoinedOrSeparate<["--"], "output-lib">, Alias<l>;
+
+def D: JoinedOrSeparate<["-"], "D">, HelpText<"Specify the input DLL Name">;
+def D_long : JoinedOrSeparate<["--"], "dllname">, Alias<D>;
+
+def d: JoinedOrSeparate<["-"], "d">, HelpText<"Input .def File">;
+def d_long : JoinedOrSeparate<["--"], "input-def">, Alias<d>;
+
+//==============================================================================
+// The flags below do nothing. They are defined only for dlltool compatibility.
+//==============================================================================
+
+def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">;
+def k_alias: Flag<["--"], "kill-at">, Alias<k>;
+
+def S: JoinedOrSeparate<["-"], "S">, HelpText<"Assembler">;
+def S_alias: JoinedOrSeparate<["--"], "as">, Alias<S>;
+
+def f: JoinedOrSeparate<["-"], "f">, HelpText<"Assembler Flags">;
+def f_alias: JoinedOrSeparate<["--"], "as-flags">, Alias<f>;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 3d57acf06e74..93eab680ca6b 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2026,6 +2026,24 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
continue;
}
+ // LLVM's definition of dominance allows instructions that are cyclic
+ // in unreachable blocks, e.g.:
+ // %pat = select i1 %condition, @global, i16* %pat
+ // because any instruction dominates an instruction in a block that's
+ // not reachable from entry.
+ // So, remove unreachable blocks from the function, because a) there's
+ // no point in analyzing them and b) GlobalOpt should otherwise grow
+ // some more complicated logic to break these cycles.
+ // Removing unreachable blocks might invalidate the dominator so we
+ // recalculate it.
+ if (!F->isDeclaration()) {
+ if (removeUnreachableBlocks(*F)) {
+ auto &DT = LookupDomTree(*F);
+ DT.recalculate(*F);
+ Changed = true;
+ }
+ }
+
Changed |= processGlobal(*F, TLI, LookupDomTree);
if (!F->hasLocalLinkage())
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 00ddb93df830..317770d133b3 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -909,7 +909,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// To check this we also need to nuke any dead constant uses (perhaps
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
- if (Callee.use_empty()) {
+ if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
Calls.erase(
std::remove_if(Calls.begin() + i + 1, Calls.end(),
[&Callee](const std::pair<CallSite, int> &Call) {
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index ac4765f96075..6baada2c1ae1 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -173,8 +173,10 @@ protected:
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
bool computeBlockWeights(Function &F);
void findEquivalenceClasses(Function &F);
+ template <bool IsPostDom>
void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
- DominatorTreeBase<BasicBlock> *DomTree);
+ DominatorTreeBase<BasicBlock, IsPostDom> *DomTree);
+
void propagateWeights(Function &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
@@ -217,7 +219,7 @@ protected:
/// \brief Dominance, post-dominance and loop information.
std::unique_ptr<DominatorTree> DT;
- std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
+ std::unique_ptr<PostDomTreeBase<BasicBlock>> PDT;
std::unique_ptr<LoopInfo> LI;
AssumptionCacheTracker *ACT;
@@ -773,9 +775,10 @@ bool SampleProfileLoader::inlineHotFunctions(
/// \param DomTree Opposite dominator tree. If \p Descendants is filled
/// with blocks from \p BB1's dominator tree, then
/// this is the post-dominator tree, and vice versa.
+template <bool IsPostDom>
void SampleProfileLoader::findEquivalencesFor(
BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
- DominatorTreeBase<BasicBlock> *DomTree) {
+ DominatorTreeBase<BasicBlock, IsPostDom> *DomTree) {
const BasicBlock *EC = EquivalenceClass[BB1];
uint64_t Weight = BlockWeights[EC];
for (const auto *BB2 : Descendants) {
@@ -1283,7 +1286,7 @@ void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
DT.reset(new DominatorTree);
DT->recalculate(F);
- PDT.reset(new DominatorTreeBase<BasicBlock>(true));
+ PDT.reset(new PostDomTreeBase<BasicBlock>());
PDT->recalculate(F);
LI.reset(new LoopInfo);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 773c86e23707..fdc9c373b95e 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1284,6 +1284,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
+ if (match(Op1, m_One())) {
+ // (1 << x) & 1 --> zext(x == 0)
+ // (1 >> x) & 1 --> zext(x == 0)
+ Value *X;
+ if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X))))) {
+ Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(I.getType(), 0));
+ return new ZExtInst(IsZero, I.getType());
+ }
+ }
+
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
const APInt &AndRHSMask = AndRHS->getValue();
@@ -1315,23 +1325,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
break;
}
- case Instruction::Sub:
- // -x & 1 -> x & 1
- if (AndRHSMask.isOneValue() && match(Op0LHS, m_Zero()))
- return BinaryOperator::CreateAnd(Op0RHS, AndRHS);
-
- break;
-
- case Instruction::Shl:
- case Instruction::LShr:
- // (1 << x) & 1 --> zext(x == 0)
- // (1 >> x) & 1 --> zext(x == 0)
- if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) {
- Value *NewICmp =
- Builder.CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
- return new ZExtInst(NewICmp, I.getType());
- }
- break;
}
// ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
@@ -1417,12 +1410,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
- // (A&((~A)|B)) -> A&B
- if (match(Op0, m_c_Or(m_Not(m_Specific(Op1)), m_Value(A))))
- return BinaryOperator::CreateAnd(A, Op1);
- if (match(Op1, m_c_Or(m_Not(m_Specific(Op0)), m_Value(A))))
- return BinaryOperator::CreateAnd(A, Op0);
-
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
@@ -2020,18 +2007,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *A, *B;
- // ((~A & B) | A) -> (A | B)
- if (match(Op0, m_c_And(m_Not(m_Specific(Op1)), m_Value(A))))
- return BinaryOperator::CreateOr(A, Op1);
- if (match(Op1, m_c_And(m_Not(m_Specific(Op0)), m_Value(A))))
- return BinaryOperator::CreateOr(Op0, A);
-
- // ((A & B) | ~A) -> (~A | B)
- // The NOT is guaranteed to be in the RHS by complexity ordering.
- if (match(Op1, m_Not(m_Value(A))) &&
- match(Op0, m_c_And(m_Specific(A), m_Value(B))))
- return BinaryOperator::CreateOr(Op1, B);
-
// (A & C)|(B & D)
Value *C = nullptr, *D = nullptr;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
@@ -2176,17 +2151,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateOr(Not, Op0);
}
- // (A & B) | (~A ^ B) -> (~A ^ B)
- // (A & B) | (B ^ ~A) -> (~A ^ B)
- // (B & A) | (~A ^ B) -> (~A ^ B)
- // (B & A) | (B ^ ~A) -> (~A ^ B)
- // The match order is important: match the xor first because the 'not'
- // operation defines 'A'. We do not need to match the xor as Op0 because the
- // xor was canonicalized to Op1 above.
- if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op0, m_c_And(m_Specific(A), m_Specific(B))))
- return BinaryOperator::CreateXor(Builder.CreateNot(A), B);
-
if (SwappedForXor)
std::swap(Op0, Op1);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 60d1cde971dd..a8faaecb5c34 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1814,9 +1814,21 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
Value *CmpQ =
Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
- auto LogicOpc = Pred == ICmpInst::Predicate::ICMP_EQ ? Instruction::And
- : Instruction::Or;
- return BinaryOperator::Create(LogicOpc, CmpP, CmpQ);
+ auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+ return BinaryOperator::Create(BOpc, CmpP, CmpQ);
+ }
+
+ // Are we using xors to bitwise check for a pair of (in)equalities? Convert to
+ // a shorter form that has more potential to be folded even further.
+ Value *X1, *X2, *X3, *X4;
+ if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
+ match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
+ // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
+ // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
+ Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
+ Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4);
+ auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+ return BinaryOperator::Create(BOpc, Cmp12, Cmp34);
}
return nullptr;
@@ -3737,6 +3749,11 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
const APInt &CVal = CI->getValue();
if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
return nullptr;
+ } else {
+ // In this case we could have the operand of the binary operation
+ // being defined in another block, and performing the replacement
+ // could break the dominance relation.
+ return nullptr;
}
} else {
// Other uses prohibit this transformation.
@@ -3856,18 +3873,17 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
assert(BO->getOpcode() == Instruction::And);
// Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
- Value *ShortMask =
- Builder.CreateTrunc(BO->getOperand(1), Builder.getIntNTy(MulWidth));
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+ APInt ShortMask = CI->getValue().trunc(MulWidth);
Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
- Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
- if (auto *ZextI = dyn_cast<Instruction>(Zext))
- IC.Worklist.Add(ZextI);
+ Instruction *Zext =
+ cast<Instruction>(Builder.CreateZExt(ShortAnd, BO->getType()));
+ IC.Worklist.Add(Zext);
IC.replaceInstUsesWith(*BO, Zext);
} else {
llvm_unreachable("Unexpected Binary operation");
}
- if (auto *UI = dyn_cast<Instruction>(U))
- IC.Worklist.Add(UI);
+ IC.Worklist.Add(cast<Instruction>(U));
}
}
if (isa<Instruction>(OtherVal))
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c59e1ce69ac2..451036545741 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -998,8 +998,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// that this code is not reachable. We do this instead of inserting
// an unreachable instruction directly because we cannot modify the
// CFG.
- new StoreInst(UndefValue::get(LI.getType()),
- Constant::getNullValue(Op->getType()), &LI);
+ StoreInst *SI = new StoreInst(UndefValue::get(LI.getType()),
+ Constant::getNullValue(Op->getType()), &LI);
+ SI->setDebugLoc(LI.getDebugLoc());
return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 5689c0604239..a20f474cbf40 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -417,8 +417,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// the highest demanded bit, we just return the other side.
if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
- // We can't do this with the LHS for subtraction.
- if (I->getOpcode() == Instruction::Add &&
+ // We can't do this with the LHS for subtraction, unless we are only
+ // demanding the LSB.
+ if ((I->getOpcode() == Instruction::Add ||
+ DemandedFromOps.isOneValue()) &&
DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 90e232399155..c7766568fd9d 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -636,17 +636,35 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+ Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
+ Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I));
+
// Do "A op C" and "B op C" both simplify?
- if (Value *L =
- SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)))
- if (Value *R =
- SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) {
- // They do! Return "L op' R".
- ++NumExpand;
- C = Builder.CreateBinOp(InnerOpcode, L, R);
- C->takeName(&I);
- return C;
- }
+ if (L && R) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ C = Builder.CreateBinOp(InnerOpcode, L, R);
+ C->takeName(&I);
+ return C;
+ }
+
+ // Does "A op C" simplify to the identity value for the inner opcode?
+ if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
+ // They do! Return "B op C".
+ ++NumExpand;
+ C = Builder.CreateBinOp(TopLevelOpcode, B, C);
+ C->takeName(&I);
+ return C;
+ }
+
+ // Does "B op C" simplify to the identity value for the inner opcode?
+ if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
+ // They do! Return "A op C".
+ ++NumExpand;
+ C = Builder.CreateBinOp(TopLevelOpcode, A, C);
+ C->takeName(&I);
+ return C;
+ }
}
if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
@@ -655,17 +673,35 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+ Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I));
+ Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
+
// Do "A op B" and "A op C" both simplify?
- if (Value *L =
- SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I)))
- if (Value *R =
- SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) {
- // They do! Return "L op' R".
- ++NumExpand;
- A = Builder.CreateBinOp(InnerOpcode, L, R);
- A->takeName(&I);
- return A;
- }
+ if (L && R) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ A = Builder.CreateBinOp(InnerOpcode, L, R);
+ A->takeName(&I);
+ return A;
+ }
+
+ // Does "A op B" simplify to the identity value for the inner opcode?
+ if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
+ // They do! Return "A op C".
+ ++NumExpand;
+ A = Builder.CreateBinOp(TopLevelOpcode, A, C);
+ A->takeName(&I);
+ return A;
+ }
+
+ // Does "A op C" simplify to the identity value for the inner opcode?
+ if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
+ // They do! Return "A op B".
+ ++NumExpand;
+ A = Builder.CreateBinOp(TopLevelOpcode, A, B);
+ A->takeName(&I);
+ return A;
+ }
}
// (op (select (a, c, b)), (select (a, d, b))) -> (select (a, (op c, d), 0))
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 184940b7ea58..057f746e052d 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -22,9 +22,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -43,6 +45,7 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
@@ -192,6 +195,11 @@ static cl::opt<uint32_t> ClMaxInlinePoisoningSize(
static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
cl::desc("Check stack-use-after-return"),
cl::Hidden, cl::init(true));
+static cl::opt<bool> ClRedzoneByvalArgs("asan-redzone-byval-args",
+ cl::desc("Create redzones for byval "
+ "arguments (extra copy "
+ "required)"), cl::Hidden,
+ cl::init(true));
static cl::opt<bool> ClUseAfterScope("asan-use-after-scope",
cl::desc("Check stack-use-after-scope"),
cl::Hidden, cl::init(false));
@@ -747,6 +755,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
bool runOnFunction() {
if (!ClStack) return false;
+
+ if (ClRedzoneByvalArgs) copyArgsPassedByValToAllocas();
+
// Collect alloca, ret, lifetime instructions etc.
for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB);
@@ -763,6 +774,11 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
return true;
}
+ // Arguments marked with the "byval" attribute are implicitly copied without
+ // using an alloca instruction. To produce redzones for those arguments, we
+ // copy them a second time into memory allocated with an alloca instruction.
+ void copyArgsPassedByValToAllocas();
+
// Finds all Alloca instructions and puts
// poisoned red zones around all of them.
// Then unpoison everything back before the function returns.
@@ -2528,6 +2544,28 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) {
llvm_unreachable("impossible LocalStackSize");
}
+void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
+ BasicBlock &FirstBB = *F.begin();
+ IRBuilder<> IRB(&FirstBB, FirstBB.getFirstInsertionPt());
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (Argument &Arg : F.args()) {
+ if (Arg.hasByValAttr()) {
+ Type *Ty = Arg.getType()->getPointerElementType();
+ unsigned Align = Arg.getParamAlignment();
+ if (Align == 0) Align = DL.getABITypeAlignment(Ty);
+
+ const std::string &Name = Arg.hasName() ? Arg.getName().str() :
+ "Arg" + llvm::to_string(Arg.getArgNo());
+ AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval");
+ AI->setAlignment(Align);
+ Arg.replaceAllUsesWith(AI);
+
+ uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+ IRB.CreateMemCpy(AI, &Arg, AllocSize, Align);
+ }
+ }
+}
+
PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond,
Value *ValueIfTrue,
Instruction *ThenTerm,
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 1348e0ed0ed0..b7c6271869cd 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3039,7 +3039,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
void visitVAStartInst(VAStartInst &I) override {
- if (F.getCallingConv() == CallingConv::X86_64_Win64)
+ if (F.getCallingConv() == CallingConv::Win64)
return;
IRBuilder<> IRB(&I);
VAStartInstrumentationList.push_back(&I);
@@ -3053,7 +3053,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
void visitVACopyInst(VACopyInst &I) override {
- if (F.getCallingConv() == CallingConv::X86_64_Win64)
+ if (F.getCallingConv() == CallingConv::Win64)
return;
IRBuilder<> IRB(&I);
Value *VAListTag = I.getArgOperand(0);
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index e3c36c98ab0d..06fe07598374 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -281,6 +281,16 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
SanCovTraceSwitchFunction =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy));
+ // Make sure smaller parameters are zero-extended to i64 as required by the
+ // x86_64 ABI.
+ if (TargetTriple.getArch() == Triple::x86_64) {
+ for (int i = 0; i < 3; i++) {
+ SanCovTraceCmpFunction[i]->addParamAttr(0, Attribute::ZExt);
+ SanCovTraceCmpFunction[i]->addParamAttr(1, Attribute::ZExt);
+ }
+ SanCovTraceDivFunction[0]->addParamAttr(0, Attribute::ZExt);
+ }
+
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 7fd77a082b82..c5c9b2c185d6 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -562,13 +562,27 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
if (!MSSA)
return false;
+ // If MemorySSA has determined that one of EarlierInst or LaterInst does not
+ // read/write memory, then we can safely return true here.
+ // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
+ // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
+ // by also checking the MemorySSA MemoryAccess on the instruction. Initial
+ // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
+ // with the default optimization pipeline.
+ auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
+ if (!EarlierMA)
+ return true;
+ auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
+ if (!LaterMA)
+ return true;
+
// Since we know LaterDef dominates LaterInst and EarlierInst dominates
// LaterInst, if LaterDef dominates EarlierInst then it can't occur between
// EarlierInst and LaterInst and neither can any other write that potentially
// clobbers LaterInst.
MemoryAccess *LaterDef =
MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
- return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
+ return MSSA->dominates(LaterDef, EarlierMA);
}
bool EarlyCSE::processNode(DomTreeNode *Node) {
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0fe72f3f7331..ea28705e684d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1168,6 +1168,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
LI->isVolatile(), LI->getAlignment(),
LI->getOrdering(), LI->getSyncScopeID(),
UnavailablePred->getTerminator());
+ NewLoad->setDebugLoc(LI->getDebugLoc());
// Transfer the old load's AA tags to the new load.
AAMDNodes Tags;
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index a40c22c3fce9..99b4458ea0fa 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -805,6 +805,25 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
ConstantInt *One = ConstantInt::get(IndVarTy, 1);
// TODO: generalize the predicates here to also match their unsigned variants.
if (IsIncreasing) {
+ bool DecreasedRightValueByOne = false;
+ // Try to turn eq/ne predicates to those we can work with.
+ if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+ // while (++i != len) { while (++i < len) {
+ // ... ---> ...
+ // } }
+ Pred = ICmpInst::ICMP_SLT;
+ else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
+ !CanBeSMin(SE, RightSCEV)) {
+ // while (true) { while (true) {
+ // if (++i == len) ---> if (++i > len - 1)
+ // break; break;
+ // ... ...
+ // } }
+ Pred = ICmpInst::ICMP_SGT;
+ RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType()));
+ DecreasedRightValueByOne = true;
+ }
+
bool FoundExpectedPred =
(Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) ||
(Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0);
@@ -829,16 +848,41 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
- IRBuilder<> B(Preheader->getTerminator());
- RightValue = B.CreateAdd(RightValue, One);
+ // We need to increase the right value unless we have already decreased
+ // it virtually when we replaced EQ with SGT.
+ if (!DecreasedRightValueByOne) {
+ IRBuilder<> B(Preheader->getTerminator());
+ RightValue = B.CreateAdd(RightValue, One);
+ }
} else {
if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart,
RightSCEV)) {
FailureReason = "Induction variable start not bounded by upper limit";
return None;
}
+ assert(!DecreasedRightValueByOne &&
+ "Right value can be decreased only for LatchBrExitIdx == 0!");
}
} else {
+ bool IncreasedRightValueByOne = false;
+ // Try to turn eq/ne predicates to those we can work with.
+ if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1)
+ // while (--i != len) { while (--i > len) {
+ // ... ---> ...
+ // } }
+ Pred = ICmpInst::ICMP_SGT;
+ else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 &&
+ !CanBeSMax(SE, RightSCEV)) {
+ // while (true) { while (true) {
+ // if (--i == len) ---> if (--i < len + 1)
+ // break; break;
+ // ... ...
+ // } }
+ Pred = ICmpInst::ICMP_SLT;
+ RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType()));
+ IncreasedRightValueByOne = true;
+ }
+
bool FoundExpectedPred =
(Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
(Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0);
@@ -863,14 +907,20 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
- IRBuilder<> B(Preheader->getTerminator());
- RightValue = B.CreateSub(RightValue, One);
+ // We need to decrease the right value unless we have already increased
+ // it virtually when we replaced EQ with SLT.
+ if (!IncreasedRightValueByOne) {
+ IRBuilder<> B(Preheader->getTerminator());
+ RightValue = B.CreateSub(RightValue, One);
+ }
} else {
if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart,
RightSCEV)) {
FailureReason = "Induction variable start not bounded by lower limit";
return None;
}
+ assert(!IncreasedRightValueByOne &&
+ "Right value can be increased only for LatchBrExitIdx == 0!");
}
}
@@ -922,14 +972,18 @@ LoopConstrainer::calculateSubRanges() const {
bool Increasing = MainLoopStructure.IndVarIncreasing;
- // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the
- // range of values the induction variable takes.
+ // We compute `Smallest` and `Greatest` such that [Smallest, Greatest), or
+ // [Smallest, GreatestSeen] is the range of values the induction variable
+ // takes.
- const SCEV *Smallest = nullptr, *Greatest = nullptr;
+ const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;
+ const SCEV *One = SE.getOne(Ty);
if (Increasing) {
Smallest = Start;
Greatest = End;
+ // No overflow, because the range [Smallest, GreatestSeen] is not empty.
+ GreatestSeen = SE.getMinusSCEV(End, One);
} else {
// These two computations may sign-overflow. Here is why that is okay:
//
@@ -947,9 +1001,9 @@ LoopConstrainer::calculateSubRanges() const {
// will be an empty range. Returning an empty range is always safe.
//
- const SCEV *One = SE.getOne(Ty);
Smallest = SE.getAddExpr(End, One);
Greatest = SE.getAddExpr(Start, One);
+ GreatestSeen = Start;
}
auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
@@ -964,7 +1018,7 @@ LoopConstrainer::calculateSubRanges() const {
Result.LowLimit = Clamp(Range.getBegin());
bool ProvablyNoPostLoop =
- SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd());
+ SE.isKnownPredicate(ICmpInst::ICMP_SLT, GreatestSeen, Range.getEnd());
if (!ProvablyNoPostLoop)
Result.HighLimit = Clamp(Range.getEnd());
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index ee3de51b1360..4056cc5cb346 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -2168,11 +2168,19 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
return false;
}
-/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form
+/// TryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the
+/// same BB in the form
/// bb:
/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
-/// %s = select p, trueval, falseval
+/// %s = select %p, trueval, falseval
///
+/// or
+///
+/// bb:
+/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...
+/// %c = cmp %p, 0
+/// %s = select %c, trueval, falseval
+//
/// And expand the select into a branch structure. This later enables
/// jump-threading over bb in this pass.
///
@@ -2186,44 +2194,54 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
if (LoopHeaders.count(BB))
return false;
- // Look for a Phi/Select pair in the same basic block. The Phi feeds the
- // condition of the Select and at least one of the incoming values is a
- // constant.
for (BasicBlock::iterator BI = BB->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
- unsigned NumPHIValues = PN->getNumIncomingValues();
- if (NumPHIValues == 0 || !PN->hasOneUse())
- continue;
-
- SelectInst *SI = dyn_cast<SelectInst>(PN->user_back());
- if (!SI || SI->getParent() != BB)
- continue;
-
- Value *Cond = SI->getCondition();
- if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1))
+ // Look for a Phi having at least one constant incoming value.
+ if (llvm::all_of(PN->incoming_values(),
+ [](Value *V) { return !isa<ConstantInt>(V); }))
continue;
- bool HasConst = false;
- for (unsigned i = 0; i != NumPHIValues; ++i) {
- if (PN->getIncomingBlock(i) == BB)
+ auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {
+ // Check if SI is in BB and use V as condition.
+ if (SI->getParent() != BB)
return false;
- if (isa<ConstantInt>(PN->getIncomingValue(i)))
- HasConst = true;
- }
+ Value *Cond = SI->getCondition();
+ return (Cond && Cond == V && Cond->getType()->isIntegerTy(1));
+ };
- if (HasConst) {
- // Expand the select.
- TerminatorInst *Term =
- SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
- PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
- NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
- NewPN->addIncoming(SI->getFalseValue(), BB);
- SI->replaceAllUsesWith(NewPN);
- SI->eraseFromParent();
- return true;
+ SelectInst *SI = nullptr;
+ for (Use &U : PN->uses()) {
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
+ // Look for a ICmp in BB that compares PN with a constant and is the
+ // condition of a Select.
+ if (Cmp->getParent() == BB && Cmp->hasOneUse() &&
+ isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))
+ if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))
+ if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {
+ SI = SelectI;
+ break;
+ }
+ } else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {
+ // Look for a Select in BB that uses PN as condtion.
+ if (isUnfoldCandidate(SelectI, U.get())) {
+ SI = SelectI;
+ break;
+ }
+ }
}
+
+ if (!SI)
+ continue;
+ // Expand the select.
+ TerminatorInst *Term =
+ SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+ PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
+ NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
+ NewPN->addIncoming(SI->getFalseValue(), BB);
+ SI->replaceAllUsesWith(NewPN);
+ SI->eraseFromParent();
+ return true;
}
-
return false;
}
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 606136dc31a4..2e0d8e0374c0 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -323,9 +324,10 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
class LoopInterchangeLegality {
public:
LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
- LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA)
+ LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA,
+ OptimizationRemarkEmitter *ORE)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
- PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {}
+ PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {}
/// Check if the loops can be interchanged.
bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -353,6 +355,8 @@ private:
LoopInfo *LI;
DominatorTree *DT;
bool PreserveLCSSA;
+ /// Interface to emit optimization remarks.
+ OptimizationRemarkEmitter *ORE;
bool InnerLoopHasReduction;
};
@@ -361,8 +365,9 @@ private:
/// loop.
class LoopInterchangeProfitability {
public:
- LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
- : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
+ LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
+ OptimizationRemarkEmitter *ORE)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
/// Check if the loop interchange is profitable.
bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -376,6 +381,8 @@ private:
/// Scev analysis.
ScalarEvolution *SE;
+ /// Interface to emit optimization remarks.
+ OptimizationRemarkEmitter *ORE;
};
/// LoopInterchangeTransform interchanges the loop.
@@ -422,6 +429,9 @@ struct LoopInterchange : public FunctionPass {
DependenceInfo *DI;
DominatorTree *DT;
bool PreserveLCSSA;
+ /// Interface to emit optimization remarks.
+ OptimizationRemarkEmitter *ORE;
+
LoopInterchange()
: FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) {
initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
@@ -435,6 +445,7 @@ struct LoopInterchange : public FunctionPass {
AU.addRequired<DependenceAnalysisWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
bool runOnFunction(Function &F) override {
@@ -446,6 +457,7 @@ struct LoopInterchange : public FunctionPass {
DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
// Build up a worklist of loop pairs to analyze.
@@ -575,18 +587,23 @@ struct LoopInterchange : public FunctionPass {
Loop *OuterLoop = LoopList[OuterLoopId];
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
- PreserveLCSSA);
+ PreserveLCSSA, ORE);
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
return false;
}
DEBUG(dbgs() << "Loops are legal to interchange\n");
- LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE);
+ LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
DEBUG(dbgs() << "Interchanging loops not profitable\n");
return false;
}
+ ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Loop interchanged with enclosing loop.");
+
LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
LoopNestExit, LIL.hasInnerLoopReduction());
LIT.transform();
@@ -760,6 +777,12 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "UnsupportedPHIInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with induction or reduction PHI nodes can be"
+ " interchange currently.");
return true;
}
@@ -767,6 +790,12 @@ bool LoopInterchangeLegality::currentLimitations() {
if (Inductions.size() != 1) {
DEBUG(dbgs() << "We currently only support loops with 1 induction variable."
<< "Failed to interchange due to current limitation\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "MultiInductionInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with 1 induction variable can be "
+ "interchanged currently.");
return true;
}
if (Reductions.size() > 0)
@@ -777,6 +806,12 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "UnsupportedPHIOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with induction or reduction PHI nodes can be"
+ " interchanged currently.");
return true;
}
@@ -785,18 +820,35 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!Reductions.empty()) {
DEBUG(dbgs() << "Outer loops with reductions are not supported "
<< "currently.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "ReductionsOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Outer loops with reductions cannot be interchangeed "
+ "currently.");
return true;
}
// TODO: Currently we handle only loops with 1 induction variable.
if (Inductions.size() != 1) {
DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
<< "supported currently.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "MultiIndutionOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with 1 induction variable can be "
+ "interchanged currently.");
return true;
}
// TODO: Triangular loops are not handled for now.
if (!isLoopStructureUnderstood(InnerInductionVar)) {
DEBUG(dbgs() << "Loop structure not understood by pass\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "UnsupportedStructureInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Inner loop structure not understood currently.");
return true;
}
@@ -805,12 +857,24 @@ bool LoopInterchangeLegality::currentLimitations() {
getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) {
DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "NoLCSSAPHIOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with LCSSA PHIs can be interchange "
+ "currently.");
return true;
}
LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) {
DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "NoLCSSAPHIOuterInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with LCSSA PHIs can be interchange "
+ "currently.");
return true;
}
@@ -835,6 +899,11 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!InnerIndexVarInc) {
DEBUG(dbgs() << "Did not find an instruction to increment the induction "
<< "variable.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "NoIncrementInInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "The inner loop does not increment the induction variable.");
return true;
}
@@ -852,6 +921,12 @@ bool LoopInterchangeLegality::currentLimitations() {
if (!I.isIdenticalTo(InnerIndexVarInc)) {
DEBUG(dbgs() << "Found unsupported instructions between induction "
<< "variable increment and branch.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "UnsupportedInsBetweenInduction",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Found unsupported instruction between induction variable "
+ "increment and branch.");
return true;
}
@@ -862,6 +937,11 @@ bool LoopInterchangeLegality::currentLimitations() {
// current limitation.
if (!FoundInduction) {
DEBUG(dbgs() << "Did not find the induction variable.\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "NoIndutionVariable",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Did not find the induction variable.");
return true;
}
return false;
@@ -875,6 +955,11 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId
<< " due to dependence\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "Dependence",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Cannot interchange loops due to dependences.");
return false;
}
@@ -910,6 +995,12 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
// Check if the loops are tightly nested.
if (!tightlyNested(OuterLoop, InnerLoop)) {
DEBUG(dbgs() << "Loops not tightly nested\n");
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "NotTightlyNested",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Cannot interchange loops because they are not tightly "
+ "nested.");
return false;
}
@@ -1005,9 +1096,18 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
// It is not profitable as per current cache profitability model. But check if
// we can move this loop outside to improve parallelism.
- bool ImprovesPar =
- isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
- return ImprovesPar;
+ if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
+ return true;
+
+ ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE,
+ "InterchangeNotProfitable",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Interchanging loops is too costly (cost="
+ << ore::NV("Cost", Cost) << ", threshold="
+ << ore::NV("Threshold", LoopInterchangeCostThreshold) <<
+ ") and it does not improve parallelism.");
+ return false;
}
void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
@@ -1291,6 +1391,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 9397b87cdf56..90c5c243f464 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
@@ -90,16 +91,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
/// If it contains any dynamic allocas, returns false.
static bool canTRE(Function &F) {
// Because of PR962, we don't TRE dynamic allocas.
- for (auto &BB : F) {
- for (auto &I : BB) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
- if (!AI->isStaticAlloca())
- return false;
- }
- }
- }
-
- return true;
+ return llvm::all_of(instructions(F), [](Instruction &I) {
+ auto *AI = dyn_cast<AllocaInst>(&I);
+ return !AI || AI->isStaticAlloca();
+ });
}
namespace {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 5170c68e2915..d43ce7abb7cd 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,6 +22,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
@@ -736,7 +737,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// remainder are connected to the original Loop's exit blocks. The remaining
// work is to update the phi nodes in the original loop, and take in the
// values from the cloned region. Also update the dominator info for
- // OtherExits, since we have new edges into OtherExits.
+ // OtherExits and their immediate successors, since we have new edges into
+ // OtherExits.
+ SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
for (auto *BB : OtherExits) {
for (auto &II : *BB) {
@@ -759,12 +762,35 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
}
}
+#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
+ for (BasicBlock *SuccBB : successors(BB)) {
+ assert(!(any_of(OtherExits,
+ [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
+ SuccBB == LatchExit) &&
+ "Breaks the definition of dedicated exits!");
+ }
+#endif
// Update the dominator info because the immediate dominator is no longer the
// header of the original Loop. BB has edges both from L and remainder code.
// Since the preheader determines which loop is run (L or directly jump to
// the remainder code), we set the immediate dominator as the preheader.
- if (DT)
+ if (DT) {
DT->changeImmediateDominator(BB, PreHeader);
+ // Also update the IDom for immediate successors of BB. If the current
+ // IDom is the header, update the IDom to be the preheader because that is
+ // the nearest common dominator of all predecessors of SuccBB. We need to
+ // check for IDom being the header because successors of exit blocks can
+ // have edges from outside the loop, and we should not incorrectly update
+ // the IDom in that case.
+ for (BasicBlock *SuccBB: successors(BB))
+ if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) {
+ if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) {
+ assert(!SuccBB->getSinglePredecessor() &&
+ "BB should be the IDom then!");
+ DT->changeImmediateDominator(SuccBB, PreHeader);
+ }
+ }
+ }
}
// Loop structure should be the following:
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index eb82ee283d44..012b10c8a9b0 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -574,11 +574,9 @@ protected:
/// Returns (and creates if needed) the trip count of the widened loop.
Value *getOrCreateVectorTripCount(Loop *NewLoop);
- /// Emit a bypass check to see if the trip count would overflow, or we
- /// wouldn't have enough iterations to execute one vector loop.
+ /// Emit a bypass check to see if the vector trip count is zero, including if
+ /// it overflows.
void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
- /// Emit a bypass check to see if the vector trip count is nonzero.
- void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass);
/// Emit a bypass check to see if all of the SCEV assumptions we've
/// had to make are correct.
void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
@@ -3289,37 +3287,16 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
BasicBlock *BB = L->getLoopPreheader();
IRBuilder<> Builder(BB->getTerminator());
- // Generate code to check that the loop's trip count that we computed by
- // adding one to the backedge-taken count will not overflow.
- Value *CheckMinIters = Builder.CreateICmpULT(
- Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
+ // Generate code to check if the loop's trip count is less than VF * UF, or
+ // equal to it in case a scalar epilogue is required; this implies that the
+ // vector trip count is zero. This check also covers the case where adding one
+ // to the backedge-taken count overflowed leading to an incorrect trip count
+ // of zero. In this case we will also jump to the scalar loop.
+ auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
+ : ICmpInst::ICMP_ULT;
+ Value *CheckMinIters = Builder.CreateICmp(
+ P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
- BasicBlock *NewBB =
- BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked");
- // Update dominator tree immediately if the generated block is a
- // LoopBypassBlock because SCEV expansions to generate loop bypass
- // checks may query it before the current function is finished.
- DT->addNewBlock(NewBB, BB);
- if (L->getParentLoop())
- L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
- ReplaceInstWithInst(BB->getTerminator(),
- BranchInst::Create(Bypass, NewBB, CheckMinIters));
- LoopBypassBlocks.push_back(BB);
-}
-
-void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
- BasicBlock *Bypass) {
- Value *TC = getOrCreateVectorTripCount(L);
- BasicBlock *BB = L->getLoopPreheader();
- IRBuilder<> Builder(BB->getTerminator());
-
- // Now, compare the new count to zero. If it is zero skip the vector loop and
- // jump to the scalar loop.
- Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()),
- "cmp.zero");
-
- // Generate code to check that the loop's trip count that we computed by
- // adding one to the backedge-taken count will not overflow.
BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
// Update dominator tree immediately if the generated block is a
// LoopBypassBlock because SCEV expansions to generate loop bypass
@@ -3328,7 +3305,7 @@ void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
if (L->getParentLoop())
L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
ReplaceInstWithInst(BB->getTerminator(),
- BranchInst::Create(Bypass, NewBB, Cmp));
+ BranchInst::Create(Bypass, NewBB, CheckMinIters));
LoopBypassBlocks.push_back(BB);
}
@@ -3477,14 +3454,13 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() {
Value *StartIdx = ConstantInt::get(IdxTy, 0);
- // We need to test whether the backedge-taken count is uint##_max. Adding one
- // to it will cause overflow and an incorrect loop trip count in the vector
- // body. In case of overflow we want to directly jump to the scalar remainder
- // loop.
- emitMinimumIterationCountCheck(Lp, ScalarPH);
// Now, compare the new count to zero. If it is zero skip the vector loop and
- // jump to the scalar loop.
- emitVectorLoopEnteredCheck(Lp, ScalarPH);
+ // jump to the scalar loop. This check also covers the case where the
+ // backedge-taken count is uint##_max: adding one to it will overflow leading
+ // to an incorrect trip count of zero. In this (rare) case we will also jump
+ // to the scalar loop.
+ emitMinimumIterationCountCheck(Lp, ScalarPH);
+
// Generate the code to check any assumptions that we've made for SCEV
// expressions.
emitSCEVChecks(Lp, ScalarPH);
@@ -3527,7 +3503,7 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// We know what the end value is.
EndValue = CountRoundDown;
} else {
- IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
+ IRBuilder<> B(Lp->getLoopPreheader()->getTerminator());
Type *StepType = II.getStep()->getType();
Instruction::CastOps CastOp =
CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
@@ -4168,7 +4144,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// To do so, we need to generate the 'identity' vector and override
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
+ Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
// This is the vector-clone of the value that leaves the loop.
Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4425043ad39a..dcbcab459a6b 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -434,7 +434,7 @@ private:
/// \returns the pointer to the vectorized value if \p VL is already
/// vectorized, or NULL. They may happen in cycles.
- Value *alreadyVectorized(ArrayRef<Value *> VL) const;
+ Value *alreadyVectorized(ArrayRef<Value *> VL, Value *OpValue) const;
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
@@ -857,7 +857,7 @@ private:
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
/// actually moved at this stage.
- bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP);
+ bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, Value *OpValue);
/// Un-bundles a group of instructions.
void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
@@ -1212,7 +1212,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check that all of the users of the scalars that we want to vectorize are
// schedulable.
Instruction *VL0 = cast<Instruction>(VL[0]);
- BasicBlock *BB = cast<Instruction>(VL0)->getParent();
+ BasicBlock *BB = VL0->getParent();
if (!DT->isReachableFromEntry(BB)) {
// Don't go into unreachable blocks. They may contain instructions with
@@ -1237,7 +1237,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
BlockScheduling &BS = *BSRef.get();
- if (!BS.tryScheduleBundle(VL, this)) {
+ if (!BS.tryScheduleBundle(VL, this, VL0)) {
DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
assert((!BS.getScheduleData(VL[0]) ||
!BS.getScheduleData(VL[0])->isPartOfBundle()) &&
@@ -2427,8 +2427,8 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
return Vec;
}
-Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
- if (const TreeEntry *En = getTreeEntry(VL[0])) {
+Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL, Value *OpValue) const {
+ if (const TreeEntry *En = getTreeEntry(OpValue)) {
if (En->isSame(VL) && En->VectorizedValue)
return En->VectorizedValue;
}
@@ -2553,7 +2553,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *InVec = vectorizeTree(INVL);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
CastInst *CI = dyn_cast<CastInst>(VL0);
@@ -2575,7 +2575,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *L = vectorizeTree(LHSV);
Value *R = vectorizeTree(RHSV);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
@@ -2604,7 +2604,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *True = vectorizeTree(TrueVec);
Value *False = vectorizeTree(FalseVec);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
Value *V = Builder.CreateSelect(Cond, True, False);
@@ -2644,7 +2644,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *LHS = vectorizeTree(LHSVL);
Value *RHS = vectorizeTree(RHSVL);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
@@ -2806,7 +2806,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *LHS = vectorizeTree(LHSVL);
Value *RHS = vectorizeTree(RHSVL);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
// Create a vector of LHS op1 RHS
@@ -3097,8 +3097,8 @@ void BoUpSLP::optimizeGatherSequence() {
// Groups the instructions to a bundle (which is then a single scheduling entity)
// and schedules instructions until the bundle gets ready.
bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
- BoUpSLP *SLP) {
- if (isa<PHINode>(VL[0]))
+ BoUpSLP *SLP, Value *OpValue) {
+ if (isa<PHINode>(OpValue))
return true;
// Initialize the instruction bundle.
@@ -3106,7 +3106,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
ScheduleData *PrevInBundle = nullptr;
ScheduleData *Bundle = nullptr;
bool ReSchedule = false;
- DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n");
+ DEBUG(dbgs() << "SLP: bundle: " << *OpValue << "\n");
// Make sure that the scheduling region contains all
// instructions of the bundle.
@@ -3177,7 +3177,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
}
}
if (!Bundle->isReady()) {
- cancelScheduling(VL, VL[0]);
+ cancelScheduling(VL, OpValue);
return false;
}
return true;